1use crate::errors::AppError;
29use crate::retry::AttemptOutcome;
30use secrecy::{ExposeSecret, SecretBox};
31use serde::{Deserialize, Serialize};
32use std::time::Duration;
33
34const OPENROUTER_CHAT_URL: &str = "https://openrouter.ai/api/v1/chat/completions";
35const DEFAULT_TIMEOUT_SECS: u64 = 600;
39const DEFAULT_CONNECT_TIMEOUT_SECS: u64 = 10;
40
41const SCHEMA_NAME: &str = "enrich_output";
44
45#[derive(Serialize)]
46struct ChatRequest<'a> {
47 model: &'a str,
48 messages: Vec<ChatMessage<'a>>,
49 response_format: ResponseFormat,
50 provider: ProviderPrefs,
51 #[serde(skip_serializing_if = "Option::is_none")]
52 reasoning: Option<ReasoningPrefs>,
53 #[serde(skip_serializing_if = "Option::is_none")]
54 max_tokens: Option<u32>,
55}
56
57#[derive(Serialize)]
58struct ChatMessage<'a> {
59 role: &'a str,
60 content: String,
61}
62
63#[derive(Serialize)]
64struct ResponseFormat {
65 #[serde(rename = "type")]
66 format_type: &'static str,
67 json_schema: JsonSchemaSpec,
68}
69
70#[derive(Serialize)]
71struct JsonSchemaSpec {
72 name: &'static str,
73 strict: bool,
74 schema: serde_json::Value,
75}
76
77#[derive(Serialize)]
78struct ProviderPrefs {
79 require_parameters: bool,
80}
81
82#[derive(Serialize)]
83struct ReasoningPrefs {
84 enabled: bool,
85}
86
87#[derive(Deserialize)]
88struct ChatResponse {
89 #[serde(default)]
90 choices: Vec<Choice>,
91 #[serde(default)]
92 usage: Option<Usage>,
93 #[serde(default)]
98 error: Option<crate::openrouter_http::ApiError>,
99}
100
101#[derive(Deserialize)]
102struct Choice {
103 message: RespMessage,
104 #[serde(default)]
108 finish_reason: Option<String>,
109}
110
111#[derive(Deserialize)]
112struct RespMessage {
113 #[serde(default)]
114 content: Option<String>,
115}
116
117#[derive(Deserialize)]
118struct Usage {
119 #[serde(default)]
120 cost: Option<f64>,
121 #[serde(default)]
124 prompt_tokens: Option<u32>,
125 #[serde(default)]
128 completion_tokens: Option<u32>,
129}
130
131#[derive(Debug)]
138pub struct ChatCompletion {
139 pub value: serde_json::Value,
141 pub cost_usd: f64,
143 pub finish_reason: Option<String>,
145 pub prompt_tokens: Option<u32>,
147 pub completion_tokens: Option<u32>,
149}
150
151#[derive(Debug)]
166pub struct ChatError {
167 pub source: AppError,
169 pub finish_reason: Option<String>,
172 pub prompt_tokens: Option<u32>,
175 pub completion_tokens: Option<u32>,
178 pub retry_class: AttemptOutcome,
181}
182
183impl ChatError {
184 fn new(source: AppError, retry_class: AttemptOutcome) -> Self {
188 Self {
189 source,
190 finish_reason: None,
191 prompt_tokens: None,
192 completion_tokens: None,
193 retry_class,
194 }
195 }
196
197 fn with_diagnostics(
201 source: AppError,
202 finish_reason: Option<String>,
203 prompt_tokens: Option<u32>,
204 completion_tokens: Option<u32>,
205 retry_class: AttemptOutcome,
206 ) -> Self {
207 Self {
208 source,
209 finish_reason,
210 prompt_tokens,
211 completion_tokens,
212 retry_class,
213 }
214 }
215}
216
217impl std::fmt::Display for ChatError {
218 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
219 std::fmt::Display::fmt(&self.source, f)
220 }
221}
222
223impl std::error::Error for ChatError {
224 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
225 Some(&self.source)
226 }
227}
228
229pub struct OpenRouterChatClient {
232 client: reqwest::Client,
233 api_key: SecretBox<String>,
234 model: String,
235 base_url: String,
239}
240
241impl OpenRouterChatClient {
242 pub fn new(
247 api_key: SecretBox<String>,
248 model: String,
249 timeout_secs: u64,
250 ) -> Result<Self, AppError> {
251 let timeout_secs = if timeout_secs == 0 {
252 DEFAULT_TIMEOUT_SECS
253 } else {
254 timeout_secs
255 };
256 let client = reqwest::Client::builder()
257 .timeout(Duration::from_secs(timeout_secs))
258 .connect_timeout(Duration::from_secs(DEFAULT_CONNECT_TIMEOUT_SECS))
259 .user_agent("sqlite-graphrag/1.1.00")
260 .build()
261 .map_err(|e| AppError::Validation(format!("failed to build HTTP client: {e}")))?;
262
263 Ok(Self {
264 client,
265 api_key,
266 model,
267 base_url: OPENROUTER_CHAT_URL.to_string(),
268 })
269 }
270
271 #[cfg(test)]
275 pub fn new_with_url(
276 api_key: SecretBox<String>,
277 model: String,
278 base_url: String,
279 timeout_secs: u64,
280 ) -> Result<Self, AppError> {
281 let mut client = Self::new(api_key, model, timeout_secs)?;
282 client.base_url = base_url;
283 Ok(client)
284 }
285
286 pub fn model(&self) -> &str {
288 &self.model
289 }
290
291 pub async fn complete(
314 &self,
315 system_prompt: &str,
316 input_text: &str,
317 schema_str: &str,
318 max_tokens: Option<u32>,
319 ) -> Result<ChatCompletion, ChatError> {
320 let schema: serde_json::Value = serde_json::from_str(schema_str).map_err(|e| {
324 ChatError::new(
325 AppError::Validation(format!("invalid JSON schema for OpenRouter request: {e}")),
326 AttemptOutcome::HardFailure,
327 )
328 })?;
329
330 let mut current_max_tokens = max_tokens;
331
332 for length_attempt in 0..=crate::constants::ENRICH_MAX_LENGTH_RETRIES {
333 let response = self
334 .complete_one_attempt(&schema, system_prompt, input_text, current_max_tokens)
335 .await?;
336
337 let finish_reason = response
338 .choices
339 .first()
340 .and_then(|c| c.finish_reason.clone());
341 let prompt_tokens = response.usage.as_ref().and_then(|u| u.prompt_tokens);
342 let completion_tokens = response.usage.as_ref().and_then(|u| u.completion_tokens);
343
344 let truncated = finish_reason.as_deref() == Some("length");
345 let retries_left = length_attempt < crate::constants::ENRICH_MAX_LENGTH_RETRIES;
346
347 if truncated && retries_left {
348 let next_max_tokens = grow_max_tokens(current_max_tokens);
349 tracing::warn!(
350 model = %self.model,
351 attempt = length_attempt,
352 previous_max_tokens = ?current_max_tokens,
353 next_max_tokens,
354 "OpenRouter completion truncated (finish_reason=length); \
355 retrying with a larger max_tokens budget"
356 );
357 current_max_tokens = Some(next_max_tokens);
358 continue;
359 }
360
361 if truncated {
362 tracing::warn!(
363 model = %self.model,
364 max_length_retries = crate::constants::ENRICH_MAX_LENGTH_RETRIES,
365 max_tokens = ?current_max_tokens,
366 "OpenRouter completion still truncated after exhausting \
367 max_tokens growth"
368 );
369 }
370
371 return self.finish_completion(
372 response,
373 finish_reason,
374 prompt_tokens,
375 completion_tokens,
376 );
377 }
378
379 unreachable!("loop always returns within ENRICH_MAX_LENGTH_RETRIES + 1 iterations")
380 }
381
382 async fn complete_one_attempt(
387 &self,
388 schema: &serde_json::Value,
389 system_prompt: &str,
390 input_text: &str,
391 max_tokens: Option<u32>,
392 ) -> Result<ChatResponse, ChatError> {
393 let primary = self.build_request(
400 schema.clone(),
401 system_prompt,
402 input_text,
403 max_tokens,
404 Some(ReasoningPrefs { enabled: false }),
405 );
406 match self.execute_with_retry(&primary).await {
407 Ok(r) => Ok(r),
408 Err(first_err) => {
409 if reasoning_disable_rejected(&first_err) {
410 tracing::warn!(
411 model = %self.model,
412 "model rejected reasoning.enabled=false (mandatory); \
413 retrying once with reasoning omitted"
414 );
415 let fallback = self.build_request(
416 schema.clone(),
417 system_prompt,
418 input_text,
419 max_tokens,
420 None,
421 );
422 match self.execute_with_retry(&fallback).await {
423 Ok(r) => Ok(r),
424 Err(_) => Err(first_err),
425 }
426 } else {
427 Err(first_err)
428 }
429 }
430 }
431 }
432
433 fn finish_completion(
449 &self,
450 response: ChatResponse,
451 finish_reason: Option<String>,
452 prompt_tokens: Option<u32>,
453 completion_tokens: Option<u32>,
454 ) -> Result<ChatCompletion, ChatError> {
455 let content = response
456 .choices
457 .into_iter()
458 .next()
459 .and_then(|c| c.message.content)
460 .filter(|c| !c.trim().is_empty())
461 .ok_or_else(|| {
462 AppError::Validation(format!(
463 "model '{}' returned no structured content (incompatible with \
464 structured outputs, or refused the request)",
465 self.model
466 ))
467 })
468 .map_err(|e| {
469 ChatError::with_diagnostics(
470 e,
471 finish_reason.clone(),
472 prompt_tokens,
473 completion_tokens,
474 AttemptOutcome::Transient,
475 )
476 })?;
477
478 let value = crate::json_repair::repair_to_value(&content).map_err(|e| {
484 ChatError::with_diagnostics(
485 AppError::Validation(format!(
486 "model '{}' returned content that could not be parsed even after \
487 JSON repair: {e}",
488 self.model
489 )),
490 finish_reason.clone(),
491 prompt_tokens,
492 completion_tokens,
493 AttemptOutcome::Transient,
494 )
495 })?;
496
497 if !value.is_object() {
505 return Err(ChatError::with_diagnostics(
506 AppError::Validation(format!(
507 "model '{}' returned non-object JSON after repair (got {}); \
508 likely a refusal or malformed structured output",
509 self.model,
510 json_shape_name(&value)
511 )),
512 finish_reason,
513 prompt_tokens,
514 completion_tokens,
515 AttemptOutcome::Transient,
516 ));
517 }
518
519 let cost = response.usage.and_then(|u| u.cost).unwrap_or(0.0);
520
521 Ok(ChatCompletion {
522 value,
523 cost_usd: cost,
524 finish_reason,
525 prompt_tokens,
526 completion_tokens,
527 })
528 }
529
530 fn build_request<'a>(
534 &'a self,
535 schema: serde_json::Value,
536 system_prompt: &str,
537 input_text: &str,
538 max_tokens: Option<u32>,
539 reasoning: Option<ReasoningPrefs>,
540 ) -> ChatRequest<'a> {
541 let mut messages = Vec::with_capacity(2);
542 messages.push(ChatMessage {
543 role: "system",
544 content: system_prompt.to_string(),
545 });
546 if !input_text.is_empty() {
547 messages.push(ChatMessage {
548 role: "user",
549 content: input_text.to_string(),
550 });
551 }
552 ChatRequest {
553 model: &self.model,
554 messages,
555 response_format: ResponseFormat {
556 format_type: "json_schema",
557 json_schema: JsonSchemaSpec {
558 name: SCHEMA_NAME,
559 strict: true,
560 schema,
561 },
562 },
563 provider: ProviderPrefs {
564 require_parameters: true,
565 },
566 reasoning,
567 max_tokens,
568 }
569 }
570
571 async fn execute_with_retry(
577 &self,
578 request: &ChatRequest<'_>,
579 ) -> Result<ChatResponse, ChatError> {
580 let mut last_err: Option<ChatError> = None;
581
582 for attempt in 0..crate::openrouter_http::MAX_RETRIES {
583 let result = self
584 .client
585 .post(&self.base_url)
586 .header(
587 "Authorization",
588 format!("Bearer {}", self.api_key.expose_secret()),
589 )
590 .json(request)
591 .send()
592 .await;
593
594 let resp = match result {
595 Ok(r) => r,
596 Err(e) if e.is_timeout() => {
597 return Err(ChatError::new(
598 AppError::Validation("OpenRouter chat request timed out".into()),
599 AttemptOutcome::Transient,
600 ));
601 }
602 Err(e) => {
603 last_err = Some(ChatError::new(
604 AppError::Validation(format!("HTTP request failed: {e}")),
605 AttemptOutcome::Transient,
606 ));
607 crate::openrouter_http::backoff(attempt).await;
608 continue;
609 }
610 };
611
612 let status = resp.status();
613
614 if status.is_success() {
615 let body = resp.text().await.map_err(|e| {
616 ChatError::new(
617 AppError::Validation(format!("failed to read response body: {e}")),
618 AttemptOutcome::Transient,
619 )
620 })?;
621 match serde_json::from_str::<ChatResponse>(&body) {
622 Ok(parsed) => {
623 if let Some(api_err) = parsed.error {
628 let retry_class =
629 crate::openrouter_http::provider_error_retry_class(&api_err);
630 return Err(ChatError::new(
631 AppError::ProviderError {
632 code: api_err.code_string(),
633 message: api_err.message,
634 },
635 retry_class,
636 ));
637 }
638 return Ok(parsed);
639 }
640 Err(e) => {
641 tracing::warn!(
642 attempt,
643 body_len = body.len(),
644 "HTTP 200 but parse failed (retrying): {e}"
645 );
646 last_err = Some(ChatError::new(
647 AppError::Validation(format!("failed to parse chat response: {e}")),
648 AttemptOutcome::Transient,
649 ));
650 crate::openrouter_http::backoff(attempt).await;
651 continue;
652 }
653 }
654 }
655
656 if status.as_u16() == 401 {
657 return Err(ChatError::new(
658 AppError::Validation("invalid OpenRouter API key (HTTP 401)".into()),
659 AttemptOutcome::HardFailure,
660 ));
661 }
662
663 if status.as_u16() == 400 || status.as_u16() == 404 {
664 let body = resp.text().await.unwrap_or_default();
665 return Err(ChatError::new(
666 AppError::Validation(format!(
667 "OpenRouter returned {status} for model '{}': {body}",
668 self.model
669 )),
670 AttemptOutcome::HardFailure,
671 ));
672 }
673
674 if status.as_u16() == 429 {
675 let retry_after = resp
676 .headers()
677 .get("retry-after")
678 .and_then(|v| v.to_str().ok())
679 .and_then(|v| v.parse::<u64>().ok())
680 .unwrap_or(2);
681 tracing::warn!(
682 attempt,
683 retry_after_secs = retry_after,
684 "OpenRouter rate limited, waiting"
685 );
686 last_err = Some(ChatError::new(
691 AppError::RateLimited {
692 detail: format!("OpenRouter HTTP 429 (retry-after {retry_after}s)"),
693 },
694 AttemptOutcome::Transient,
695 ));
696 tokio::time::sleep(Duration::from_secs(retry_after)).await;
697 continue;
698 }
699
700 if status.is_server_error() {
701 tracing::warn!(attempt, status = %status, "OpenRouter server error, retrying");
702 last_err = Some(ChatError::new(
703 AppError::Validation(format!("OpenRouter server error: {status}")),
704 AttemptOutcome::Transient,
705 ));
706 crate::openrouter_http::backoff(attempt).await;
707 continue;
708 }
709
710 let body = resp.text().await.unwrap_or_default();
711 return Err(ChatError::new(
712 AppError::Validation(format!("unexpected HTTP {status}: {body}")),
713 crate::openrouter_http::status_retry_class(status),
714 ));
715 }
716
717 Err(last_err.unwrap_or_else(|| {
722 ChatError::new(
723 AppError::Validation("max retries exceeded for OpenRouter chat request".into()),
724 AttemptOutcome::Transient,
725 )
726 }))
727 }
728}
729
730fn grow_max_tokens(current: Option<u32>) -> u32 {
737 let base = current.unwrap_or(crate::constants::ENRICH_INITIAL_MAX_TOKENS);
738 base.saturating_mul(crate::constants::ENRICH_MAX_TOKENS_GROWTH_FACTOR)
739 .min(crate::constants::ENRICH_MAX_TOKENS_CEILING)
740}
741
742fn reasoning_disable_rejected(err: &ChatError) -> bool {
752 let msg = err.source.to_string().to_lowercase();
753 msg.contains("400") && msg.contains("reasoning")
754}
755
756fn json_shape_name(value: &serde_json::Value) -> &'static str {
759 match value {
760 serde_json::Value::Null => "null",
761 serde_json::Value::Bool(_) => "boolean",
762 serde_json::Value::Number(_) => "number",
763 serde_json::Value::String(_) => "string",
764 serde_json::Value::Array(_) => "array",
765 serde_json::Value::Object(_) => "object",
766 }
767}
768
769#[cfg(test)]
770mod tests {
771 use super::*;
772 use serde_json::json;
773 use wiremock::matchers::{body_partial_json, method, path};
774 use wiremock::{Mock, MockServer, ResponseTemplate};
775
776 const TEST_SCHEMA: &str = r#"{"type":"object"}"#;
777
778 fn key() -> SecretBox<String> {
779 SecretBox::new(Box::new("test-key".to_string()))
780 }
781
782 fn success_body(content: &str, cost: Option<f64>) -> serde_json::Value {
787 success_body_with_finish(content, cost, "stop")
788 }
789
790 fn success_body_with_finish(
793 content: &str,
794 cost: Option<f64>,
795 finish_reason: &str,
796 ) -> serde_json::Value {
797 let mut body = json!({
798 "choices": [{ "message": { "content": content }, "finish_reason": finish_reason }]
799 });
800 if let Some(c) = cost {
801 body["usage"] = json!({ "cost": c });
802 }
803 body
804 }
805
806 async fn client_for(server: &MockServer, model: &str) -> OpenRouterChatClient {
807 OpenRouterChatClient::new_with_url(
808 key(),
809 model.to_string(),
810 format!("{}/chat/completions", server.uri()),
811 30,
812 )
813 .expect("test client builds")
814 }
815
816 #[test]
817 fn new_builds_client_and_binds_model() {
818 let client = OpenRouterChatClient::new(key(), "z-ai/glm-5.2".to_string(), 30)
819 .expect("client builds");
820 assert_eq!(client.model(), "z-ai/glm-5.2");
821 }
822
823 #[test]
824 fn new_defaults_base_url_to_public_endpoint() {
825 let client = OpenRouterChatClient::new(key(), "z-ai/glm-5.2".to_string(), 30)
826 .expect("client builds");
827 assert_eq!(client.base_url, OPENROUTER_CHAT_URL);
828 }
829
830 #[test]
831 fn request_serializes_with_strict_schema_and_disabled_reasoning() {
832 let request = ChatRequest {
833 model: "deepseek/deepseek-v4-flash",
834 messages: vec![ChatMessage {
835 role: "system",
836 content: "extract".to_string(),
837 }],
838 response_format: ResponseFormat {
839 format_type: "json_schema",
840 json_schema: JsonSchemaSpec {
841 name: SCHEMA_NAME,
842 strict: true,
843 schema: serde_json::json!({"type": "object"}),
844 },
845 },
846 provider: ProviderPrefs {
847 require_parameters: true,
848 },
849 reasoning: Some(ReasoningPrefs { enabled: false }),
850 max_tokens: None,
851 };
852 let json = serde_json::to_value(&request).expect("serializes");
853 assert_eq!(json["response_format"]["type"], "json_schema");
854 assert_eq!(json["response_format"]["json_schema"]["strict"], true);
855 assert_eq!(json["provider"]["require_parameters"], true);
856 assert_eq!(json["reasoning"]["enabled"], false);
857 assert!(json.get("max_tokens").is_none());
859 }
860
861 #[test]
862 fn grow_max_tokens_uses_initial_default_when_current_is_none() {
863 assert_eq!(
864 grow_max_tokens(None),
865 crate::constants::ENRICH_INITIAL_MAX_TOKENS
866 * crate::constants::ENRICH_MAX_TOKENS_GROWTH_FACTOR
867 );
868 }
869
870 #[test]
871 fn grow_max_tokens_caps_at_ceiling() {
872 assert_eq!(
873 grow_max_tokens(Some(crate::constants::ENRICH_MAX_TOKENS_CEILING)),
874 crate::constants::ENRICH_MAX_TOKENS_CEILING
875 );
876 assert_eq!(
877 grow_max_tokens(Some(u32::MAX)),
878 crate::constants::ENRICH_MAX_TOKENS_CEILING
879 );
880 }
881
882 #[tokio::test]
883 async fn complete_sends_wellformed_request_and_parses_content() {
884 let server = MockServer::start().await;
885 Mock::given(method("POST"))
886 .and(path("/chat/completions"))
887 .and(body_partial_json(json!({
888 "model": "deepseek/deepseek-v4-flash",
889 "response_format": {
890 "type": "json_schema",
891 "json_schema": { "name": "enrich_output", "strict": true }
892 },
893 "provider": { "require_parameters": true },
894 "reasoning": { "enabled": false }
895 })))
896 .respond_with(ResponseTemplate::new(200).set_body_json(success_body(
897 r#"{"entities":[],"relationships":[]}"#,
898 Some(0.0023),
899 )))
900 .expect(1)
901 .mount(&server)
902 .await;
903
904 let client = client_for(&server, "deepseek/deepseek-v4-flash").await;
905 let completion = client
906 .complete("system", "input", TEST_SCHEMA, None)
907 .await
908 .expect("completion succeeds");
909
910 assert_eq!(
911 completion.value,
912 json!({"entities": [], "relationships": []})
913 );
914 assert!((completion.cost_usd - 0.0023).abs() < f64::EPSILON);
915 assert_eq!(completion.finish_reason.as_deref(), Some("stop"));
916 }
917
918 #[tokio::test]
919 async fn complete_defaults_cost_to_zero_when_usage_absent() {
920 let server = MockServer::start().await;
921 Mock::given(method("POST"))
922 .respond_with(
923 ResponseTemplate::new(200).set_body_json(success_body(r#"{"entities":[]}"#, None)),
924 )
925 .mount(&server)
926 .await;
927
928 let client = client_for(&server, "z-ai/glm-5.2").await;
929 let completion = client
930 .complete("system", "", TEST_SCHEMA, Some(4096))
931 .await
932 .expect("completion succeeds");
933 assert_eq!(completion.cost_usd, 0.0);
934 }
935
936 #[tokio::test]
937 async fn complete_retries_on_429_honouring_retry_after() {
938 let server = MockServer::start().await;
939 Mock::given(method("POST"))
940 .respond_with(ResponseTemplate::new(429).insert_header("retry-after", "1"))
941 .up_to_n_times(1)
942 .expect(1)
943 .mount(&server)
944 .await;
945 Mock::given(method("POST"))
946 .respond_with(
947 ResponseTemplate::new(200).set_body_json(success_body(r#"{"ok":true}"#, Some(0.0))),
948 )
949 .expect(1)
950 .mount(&server)
951 .await;
952
953 let client = client_for(&server, "minimax/minimax-m3").await;
954 let completion = client
955 .complete("system", "input", TEST_SCHEMA, None)
956 .await
957 .expect("retried completion succeeds");
958 assert_eq!(completion.value, json!({"ok": true}));
959 }
960
961 #[tokio::test]
962 async fn complete_retries_on_5xx_with_backoff() {
963 let server = MockServer::start().await;
964 Mock::given(method("POST"))
965 .respond_with(ResponseTemplate::new(503))
966 .up_to_n_times(1)
967 .expect(1)
968 .mount(&server)
969 .await;
970 Mock::given(method("POST"))
971 .respond_with(
972 ResponseTemplate::new(200).set_body_json(success_body(r#"{"ok":1}"#, Some(0.0))),
973 )
974 .expect(1)
975 .mount(&server)
976 .await;
977
978 let client = client_for(&server, "openai/gpt-oss-120b").await;
979 let completion = client
980 .complete("system", "input", TEST_SCHEMA, None)
981 .await
982 .expect("retried completion succeeds");
983 assert_eq!(completion.value, json!({"ok": 1}));
984 }
985
986 #[tokio::test]
987 async fn complete_401_is_permanent_without_retry() {
988 let server = MockServer::start().await;
989 Mock::given(method("POST"))
990 .respond_with(ResponseTemplate::new(401))
991 .expect(1)
992 .mount(&server)
993 .await;
994
995 let client = client_for(&server, "z-ai/glm-5.2").await;
996 let err = client
997 .complete("system", "input", TEST_SCHEMA, None)
998 .await
999 .expect_err("401 is an error");
1000 assert!(err.to_string().contains("401"), "got: {err}");
1001 assert_eq!(err.retry_class, AttemptOutcome::HardFailure);
1002 }
1003
1004 #[tokio::test]
1005 async fn complete_400_returns_body_and_model_without_retry() {
1006 let server = MockServer::start().await;
1007 Mock::given(method("POST"))
1008 .respond_with(ResponseTemplate::new(400).set_body_string("schema not supported"))
1009 .expect(1)
1010 .mount(&server)
1011 .await;
1012
1013 let client = client_for(&server, "xiaomi/mimo-v2.5").await;
1014 let err = client
1015 .complete("system", "input", TEST_SCHEMA, None)
1016 .await
1017 .expect_err("400 is an error");
1018 let msg = err.to_string();
1019 assert!(msg.contains("400"), "got: {msg}");
1020 assert!(msg.contains("xiaomi/mimo-v2.5"), "got: {msg}");
1021 assert!(msg.contains("schema not supported"), "got: {msg}");
1022 assert_eq!(err.retry_class, AttemptOutcome::HardFailure);
1023 }
1024
1025 #[tokio::test]
1026 async fn complete_empty_choices_errors_citing_model() {
1027 let server = MockServer::start().await;
1028 Mock::given(method("POST"))
1029 .respond_with(ResponseTemplate::new(200).set_body_json(json!({ "choices": [] })))
1030 .mount(&server)
1031 .await;
1032
1033 let client = client_for(&server, "minimax/minimax-m2.7").await;
1034 let err = client
1035 .complete("system", "input", TEST_SCHEMA, None)
1036 .await
1037 .expect_err("empty choices is an error");
1038 let msg = err.to_string();
1039 assert!(msg.contains("minimax/minimax-m2.7"), "got: {msg}");
1040 assert!(msg.contains("no structured content"), "got: {msg}");
1041 assert_eq!(err.finish_reason, None);
1042 assert_eq!(
1043 err.retry_class,
1044 AttemptOutcome::Transient,
1045 "no-content is a model hiccup, not a permanent rejection"
1046 );
1047 }
1048
1049 #[tokio::test]
1050 async fn complete_empty_content_errors() {
1051 let server = MockServer::start().await;
1052 Mock::given(method("POST"))
1053 .respond_with(ResponseTemplate::new(200).set_body_json(success_body(" ", Some(0.0))))
1054 .mount(&server)
1055 .await;
1056
1057 let client = client_for(&server, "z-ai/glm-5.2:nitro").await;
1058 let err = client
1059 .complete("system", "input", TEST_SCHEMA, None)
1060 .await
1061 .expect_err("blank content is an error");
1062 assert!(
1063 err.to_string().contains("no structured content"),
1064 "got: {err}"
1065 );
1066 }
1067
1068 #[tokio::test]
1069 async fn complete_non_json_content_errors_as_incompatible() {
1070 let server = MockServer::start().await;
1074 Mock::given(method("POST"))
1075 .respond_with(
1076 ResponseTemplate::new(200)
1077 .set_body_json(success_body("this is not json", Some(0.0))),
1078 )
1079 .mount(&server)
1080 .await;
1081
1082 let client = client_for(&server, "google/gemini-3.1-flash-lite").await;
1083 let err = client
1084 .complete("system", "input", TEST_SCHEMA, None)
1085 .await
1086 .expect_err("non-json content is an error");
1087 let msg = err.to_string();
1088 assert!(msg.contains("non-object JSON after repair"), "got: {msg}");
1089 assert!(msg.contains("google/gemini-3.1-flash-lite"), "got: {msg}");
1090 }
1091
1092 #[tokio::test]
1093 async fn complete_repairs_markdown_fenced_object() {
1094 let server = MockServer::start().await;
1098 Mock::given(method("POST"))
1099 .respond_with(ResponseTemplate::new(200).set_body_json(success_body(
1100 "```json\n{\"entities\":[\"rust\"],\"relationships\":[]}\n```",
1101 Some(0.0),
1102 )))
1103 .mount(&server)
1104 .await;
1105
1106 let client = client_for(&server, "deepseek/deepseek-v4-flash").await;
1107 let completion = client
1108 .complete("system", "input", TEST_SCHEMA, None)
1109 .await
1110 .expect("fenced object is repaired");
1111 assert_eq!(
1112 completion.value,
1113 json!({"entities": ["rust"], "relationships": []})
1114 );
1115 }
1116
1117 #[tokio::test]
1118 async fn complete_rejects_invalid_schema_before_network() {
1119 let client = OpenRouterChatClient::new_with_url(
1121 key(),
1122 "z-ai/glm-5.2".to_string(),
1123 "http://127.0.0.1:1/chat/completions".to_string(),
1124 30,
1125 )
1126 .expect("client builds");
1127 let err = client
1128 .complete("system", "input", "{not valid json", None)
1129 .await
1130 .expect_err("invalid schema is rejected");
1131 assert!(
1132 err.to_string().contains("invalid JSON schema"),
1133 "got: {err}"
1134 );
1135 assert_eq!(
1136 err.retry_class,
1137 AttemptOutcome::HardFailure,
1138 "a malformed schema is a permanent caller error"
1139 );
1140 }
1141
1142 #[tokio::test]
1143 async fn complete_retries_with_reasoning_omitted_when_mandatory() {
1144 let server = MockServer::start().await;
1145 Mock::given(method("POST"))
1149 .respond_with(
1150 ResponseTemplate::new(400).set_body_string(
1151 "reasoning is mandatory for this model and cannot be disabled",
1152 ),
1153 )
1154 .up_to_n_times(1)
1155 .expect(1)
1156 .mount(&server)
1157 .await;
1158 Mock::given(method("POST"))
1160 .respond_with(ResponseTemplate::new(200).set_body_json(success_body(
1161 r#"{"entities":[],"relationships":[]}"#,
1162 Some(0.0),
1163 )))
1164 .expect(1)
1165 .mount(&server)
1166 .await;
1167
1168 let client = client_for(&server, "minimax/minimax-m2.7").await;
1169 let completion = client
1170 .complete("system", "input", TEST_SCHEMA, None)
1171 .await
1172 .expect("fallback completion succeeds");
1173 assert_eq!(
1174 completion.value,
1175 json!({"entities": [], "relationships": []})
1176 );
1177
1178 let requests = server
1181 .received_requests()
1182 .await
1183 .expect("request recording is enabled");
1184 assert_eq!(requests.len(), 2, "expected primary + fallback requests");
1185 let first: serde_json::Value =
1186 serde_json::from_slice(&requests[0].body).expect("first request body is JSON");
1187 let second: serde_json::Value =
1188 serde_json::from_slice(&requests[1].body).expect("second request body is JSON");
1189 assert_eq!(
1190 first["reasoning"]["enabled"],
1191 json!(false),
1192 "primary request must send reasoning.enabled=false"
1193 );
1194 assert!(
1195 second.get("reasoning").is_none(),
1196 "fallback request must omit the reasoning field, got: {second}"
1197 );
1198 }
1199
1200 #[tokio::test]
1201 async fn complete_honours_configured_timeout() {
1202 let server = MockServer::start().await;
1206 Mock::given(method("POST"))
1207 .respond_with(
1208 ResponseTemplate::new(200)
1209 .set_delay(std::time::Duration::from_secs(2))
1210 .set_body_json(success_body(r#"{"ok":1}"#, Some(0.0))),
1211 )
1212 .mount(&server)
1213 .await;
1214
1215 let client = OpenRouterChatClient::new_with_url(
1216 key(),
1217 "z-ai/glm-5.2".to_string(),
1218 format!("{}/chat/completions", server.uri()),
1219 1,
1220 )
1221 .expect("client builds");
1222 let err = client
1223 .complete("system", "input", TEST_SCHEMA, None)
1224 .await
1225 .expect_err("request exceeds the 1s timeout");
1226 assert!(err.to_string().contains("timed out"), "got: {err}");
1227 }
1228
1229 #[tokio::test]
1230 async fn complete_surfaces_provider_error_in_200_body() {
1231 let server = MockServer::start().await;
1235 Mock::given(method("POST"))
1236 .respond_with(ResponseTemplate::new(200).set_body_json(json!({
1237 "error": { "code": 400, "message": "context length exceeded" }
1238 })))
1239 .mount(&server)
1240 .await;
1241
1242 let client = client_for(&server, "deepseek/deepseek-v4-flash").await;
1243 let err = client
1244 .complete("system", "input", TEST_SCHEMA, None)
1245 .await
1246 .expect_err("provider error must surface");
1247 let msg = err.to_string();
1248 assert!(msg.contains("context length exceeded"), "got: {msg}");
1249 assert!(
1250 !msg.contains("no structured content"),
1251 "must not mask as empty choices: {msg}"
1252 );
1253 assert!(
1254 !msg.contains("missing field"),
1255 "must not mask as a missing field: {msg}"
1256 );
1257 assert_eq!(
1258 err.retry_class,
1259 AttemptOutcome::HardFailure,
1260 "code 400 (context length exceeded) is a permanent provider rejection"
1261 );
1262 }
1263
1264 #[tokio::test]
1265 async fn complete_classifies_provider_error_429_code_as_transient() {
1266 let server = MockServer::start().await;
1270 Mock::given(method("POST"))
1271 .respond_with(ResponseTemplate::new(200).set_body_json(json!({
1272 "error": { "code": 429, "message": "rate limited" }
1273 })))
1274 .mount(&server)
1275 .await;
1276
1277 let client = client_for(&server, "deepseek/deepseek-v4-flash").await;
1278 let err = client
1279 .complete("system", "input", TEST_SCHEMA, None)
1280 .await
1281 .expect_err("provider rate-limit error must surface");
1282 assert_eq!(err.retry_class, AttemptOutcome::Transient);
1283 }
1284
1285 #[tokio::test]
1286 async fn complete_classifies_exhausted_5xx_retries_as_transient() {
1287 let server = MockServer::start().await;
1291 Mock::given(method("POST"))
1292 .respond_with(ResponseTemplate::new(503))
1293 .mount(&server)
1294 .await;
1295
1296 let client = client_for(&server, "openai/gpt-oss-120b").await;
1297 let err = client
1298 .complete("system", "input", TEST_SCHEMA, None)
1299 .await
1300 .expect_err("persistent 5xx exhausts retries");
1301 assert_eq!(err.retry_class, AttemptOutcome::Transient);
1302 }
1303
1304 #[tokio::test]
1305 async fn complete_regrows_max_tokens_and_retries_on_length_truncation() {
1306 let server = MockServer::start().await;
1312 Mock::given(method("POST"))
1313 .respond_with(
1314 ResponseTemplate::new(200).set_body_json(success_body_with_finish(
1315 r#"{"entities":["trunc"#,
1316 Some(0.001),
1317 "length",
1318 )),
1319 )
1320 .up_to_n_times(1)
1321 .expect(1)
1322 .mount(&server)
1323 .await;
1324 Mock::given(method("POST"))
1325 .respond_with(
1326 ResponseTemplate::new(200).set_body_json(success_body_with_finish(
1327 r#"{"entities":["rust"],"relationships":[]}"#,
1328 Some(0.002),
1329 "stop",
1330 )),
1331 )
1332 .expect(1)
1333 .mount(&server)
1334 .await;
1335
1336 let client = client_for(&server, "deepseek/deepseek-v4-flash:nitro").await;
1337 let completion = client
1338 .complete("system", "input", TEST_SCHEMA, Some(64))
1339 .await
1340 .expect("second attempt with grown max_tokens succeeds");
1341
1342 assert_eq!(
1343 completion.value,
1344 json!({"entities": ["rust"], "relationships": []})
1345 );
1346 assert_eq!(completion.finish_reason.as_deref(), Some("stop"));
1347
1348 let requests = server
1349 .received_requests()
1350 .await
1351 .expect("request recording is enabled");
1352 assert_eq!(requests.len(), 2, "expected exactly one regrowth retry");
1353 let first: serde_json::Value =
1354 serde_json::from_slice(&requests[0].body).expect("first request body is JSON");
1355 let second: serde_json::Value =
1356 serde_json::from_slice(&requests[1].body).expect("second request body is JSON");
1357 assert_eq!(first["max_tokens"], json!(64));
1358 assert_eq!(
1359 second["max_tokens"],
1360 json!(64 * crate::constants::ENRICH_MAX_TOKENS_GROWTH_FACTOR),
1361 "max_tokens must grow by ENRICH_MAX_TOKENS_GROWTH_FACTOR before the retry"
1362 );
1363 }
1364
1365 #[tokio::test]
1366 async fn complete_captures_finish_reason_and_tokens_on_success() {
1367 let server = MockServer::start().await;
1370 Mock::given(method("POST"))
1371 .respond_with(ResponseTemplate::new(200).set_body_json(json!({
1372 "choices": [{
1373 "message": { "content": r#"{"ok":true}"# },
1374 "finish_reason": "stop"
1375 }],
1376 "usage": { "cost": 0.001, "prompt_tokens": 120, "completion_tokens": 30 }
1377 })))
1378 .mount(&server)
1379 .await;
1380
1381 let client = client_for(&server, "z-ai/glm-5.2").await;
1382 let completion = client
1383 .complete("system", "input", TEST_SCHEMA, None)
1384 .await
1385 .expect("completion succeeds");
1386
1387 assert_eq!(completion.finish_reason.as_deref(), Some("stop"));
1388 assert_eq!(completion.prompt_tokens, Some(120));
1389 assert_eq!(completion.completion_tokens, Some(30));
1390 }
1391
1392 #[tokio::test]
1393 async fn complete_gives_up_after_exhausting_length_retries() {
1394 let server = MockServer::start().await;
1399 Mock::given(method("POST"))
1400 .respond_with(
1401 ResponseTemplate::new(200).set_body_json(success_body_with_finish(
1402 r#"[1, 2, 3"#,
1403 Some(0.0),
1404 "length",
1405 )),
1406 )
1407 .mount(&server)
1408 .await;
1409
1410 let client = client_for(&server, "deepseek/deepseek-v4-flash:nitro").await;
1411 let err = client
1412 .complete("system", "input", TEST_SCHEMA, Some(64))
1413 .await
1414 .expect_err("exhausted length retries must fail");
1415 assert_eq!(err.finish_reason.as_deref(), Some("length"));
1416 assert_eq!(
1417 err.retry_class,
1418 AttemptOutcome::Transient,
1419 "a repeatedly truncated response is a bounded-retry hiccup, not permanent"
1420 );
1421
1422 let requests = server
1423 .received_requests()
1424 .await
1425 .expect("request recording is enabled");
1426 assert_eq!(
1427 requests.len() as u32,
1428 crate::constants::ENRICH_MAX_LENGTH_RETRIES + 1,
1429 "expected the primary attempt plus every regrowth retry"
1430 );
1431 }
1432}