1use crate::llm::attachments::{request_has_attachments, validate_request_attachments};
11use crate::llm::{
12 ChatOutcome, ChatRequest, ChatResponse, Content, ContentBlock, Effort, LlmProvider, StopReason,
13 StreamBox, StreamDelta, ThinkingConfig, ThinkingMode, Usage,
14};
15use anyhow::Result;
16use async_trait::async_trait;
17use futures::StreamExt;
18use reqwest::StatusCode;
19use serde::de::Error as _;
20use serde::{Deserialize, Serialize};
21
22use super::openai_responses::OpenAIResponsesProvider;
23
24const DEFAULT_BASE_URL: &str = "https://api.openai.com/v1";
25
26fn requires_responses_api(model: &str) -> bool {
28 model == MODEL_GPT52_CODEX
29}
30
31fn is_official_openai_base_url(base_url: &str) -> bool {
32 base_url == DEFAULT_BASE_URL || base_url.contains("api.openai.com")
33}
34
35fn request_is_agentic(request: &ChatRequest) -> bool {
36 request
37 .tools
38 .as_ref()
39 .is_some_and(|tools| !tools.is_empty()) || request.messages.iter().any(|message| {
40 matches!(
41 &message.content,
42 Content::Blocks(blocks)
43 if blocks.iter().any(|block| {
44 matches!(block, ContentBlock::ToolUse { .. } | ContentBlock::ToolResult { .. })
45 })
46 )
47 })
48}
49
50fn should_use_responses_api(base_url: &str, model: &str, request: &ChatRequest) -> bool {
51 requires_responses_api(model)
52 || request_has_attachments(request)
53 || (is_official_openai_base_url(base_url) && request_is_agentic(request))
54}
55
56pub const MODEL_GPT54: &str = "gpt-5.4";
58
59pub const MODEL_GPT53_CODEX: &str = "gpt-5.3-codex";
61
62pub const MODEL_GPT52_INSTANT: &str = "gpt-5.2-instant";
64pub const MODEL_GPT52_THINKING: &str = "gpt-5.2-thinking";
65pub const MODEL_GPT52_PRO: &str = "gpt-5.2-pro";
66pub const MODEL_GPT52_CODEX: &str = "gpt-5.2-codex";
67
68pub const MODEL_GPT5: &str = "gpt-5";
70pub const MODEL_GPT5_MINI: &str = "gpt-5-mini";
71pub const MODEL_GPT5_NANO: &str = "gpt-5-nano";
72
73pub const MODEL_O3: &str = "o3";
75pub const MODEL_O3_MINI: &str = "o3-mini";
76pub const MODEL_O4_MINI: &str = "o4-mini";
77pub const MODEL_O1: &str = "o1";
78pub const MODEL_O1_MINI: &str = "o1-mini";
79
80pub const MODEL_GPT41: &str = "gpt-4.1";
82pub const MODEL_GPT41_MINI: &str = "gpt-4.1-mini";
83pub const MODEL_GPT41_NANO: &str = "gpt-4.1-nano";
84
85pub const MODEL_GPT4O: &str = "gpt-4o";
87pub const MODEL_GPT4O_MINI: &str = "gpt-4o-mini";
88
89pub const BASE_URL_KIMI: &str = "https://api.moonshot.ai/v1";
91pub const BASE_URL_ZAI: &str = "https://api.z.ai/api/paas/v4";
92pub const BASE_URL_MINIMAX: &str = "https://api.minimax.io/v1";
93pub const MODEL_KIMI_K2_5: &str = "kimi-k2.5";
94pub const MODEL_KIMI_K2_THINKING: &str = "kimi-k2-thinking";
95pub const MODEL_ZAI_GLM5: &str = "glm-5";
96pub const MODEL_MINIMAX_M2_5: &str = "MiniMax-M2.5";
97
98#[derive(Clone)]
103pub struct OpenAIProvider {
104 client: reqwest::Client,
105 api_key: String,
106 model: String,
107 base_url: String,
108 thinking: Option<ThinkingConfig>,
109 extra_headers: Vec<(String, String)>,
111}
112
113impl OpenAIProvider {
114 #[must_use]
116 pub fn new(api_key: String, model: String) -> Self {
117 Self {
118 client: reqwest::Client::new(),
119 api_key,
120 model,
121 base_url: DEFAULT_BASE_URL.to_owned(),
122 thinking: None,
123 extra_headers: Vec::new(),
124 }
125 }
126
127 #[must_use]
129 pub fn with_base_url(api_key: String, model: String, base_url: String) -> Self {
130 Self {
131 client: reqwest::Client::new(),
132 api_key,
133 model,
134 base_url,
135 thinking: None,
136 extra_headers: Vec::new(),
137 }
138 }
139
140 #[must_use]
142 pub fn kimi(api_key: String, model: String) -> Self {
143 Self::with_base_url(api_key, model, BASE_URL_KIMI.to_owned())
144 }
145
146 #[must_use]
148 pub fn kimi_k2_5(api_key: String) -> Self {
149 Self::kimi(api_key, MODEL_KIMI_K2_5.to_owned())
150 }
151
152 #[must_use]
154 pub fn kimi_k2_thinking(api_key: String) -> Self {
155 Self::kimi(api_key, MODEL_KIMI_K2_THINKING.to_owned())
156 }
157
158 #[must_use]
160 pub fn zai(api_key: String, model: String) -> Self {
161 Self::with_base_url(api_key, model, BASE_URL_ZAI.to_owned())
162 }
163
164 #[must_use]
166 pub fn zai_glm5(api_key: String) -> Self {
167 Self::zai(api_key, MODEL_ZAI_GLM5.to_owned())
168 }
169
170 #[must_use]
172 pub fn minimax(api_key: String, model: String) -> Self {
173 Self::with_base_url(api_key, model, BASE_URL_MINIMAX.to_owned())
174 }
175
176 #[must_use]
178 pub fn minimax_m2_5(api_key: String) -> Self {
179 Self::minimax(api_key, MODEL_MINIMAX_M2_5.to_owned())
180 }
181
182 #[must_use]
184 pub fn gpt52_instant(api_key: String) -> Self {
185 Self::new(api_key, MODEL_GPT52_INSTANT.to_owned())
186 }
187
188 #[must_use]
190 pub fn gpt54(api_key: String) -> Self {
191 Self::new(api_key, MODEL_GPT54.to_owned())
192 }
193
194 #[must_use]
196 pub fn gpt53_codex(api_key: String) -> Self {
197 Self::new(api_key, MODEL_GPT53_CODEX.to_owned())
198 }
199
200 #[must_use]
202 pub fn gpt52_thinking(api_key: String) -> Self {
203 Self::new(api_key, MODEL_GPT52_THINKING.to_owned())
204 }
205
206 #[must_use]
208 pub fn gpt52_pro(api_key: String) -> Self {
209 Self::new(api_key, MODEL_GPT52_PRO.to_owned())
210 }
211
212 #[must_use]
214 pub fn codex(api_key: String) -> Self {
215 Self::gpt53_codex(api_key)
216 }
217
218 #[must_use]
220 pub fn gpt5(api_key: String) -> Self {
221 Self::new(api_key, MODEL_GPT5.to_owned())
222 }
223
224 #[must_use]
226 pub fn gpt5_mini(api_key: String) -> Self {
227 Self::new(api_key, MODEL_GPT5_MINI.to_owned())
228 }
229
230 #[must_use]
232 pub fn gpt5_nano(api_key: String) -> Self {
233 Self::new(api_key, MODEL_GPT5_NANO.to_owned())
234 }
235
236 #[must_use]
238 pub fn o3(api_key: String) -> Self {
239 Self::new(api_key, MODEL_O3.to_owned())
240 }
241
242 #[must_use]
244 pub fn o3_mini(api_key: String) -> Self {
245 Self::new(api_key, MODEL_O3_MINI.to_owned())
246 }
247
248 #[must_use]
250 pub fn o4_mini(api_key: String) -> Self {
251 Self::new(api_key, MODEL_O4_MINI.to_owned())
252 }
253
254 #[must_use]
256 pub fn o1(api_key: String) -> Self {
257 Self::new(api_key, MODEL_O1.to_owned())
258 }
259
260 #[must_use]
262 pub fn o1_mini(api_key: String) -> Self {
263 Self::new(api_key, MODEL_O1_MINI.to_owned())
264 }
265
266 #[must_use]
268 pub fn gpt41(api_key: String) -> Self {
269 Self::new(api_key, MODEL_GPT41.to_owned())
270 }
271
272 #[must_use]
274 pub fn gpt41_mini(api_key: String) -> Self {
275 Self::new(api_key, MODEL_GPT41_MINI.to_owned())
276 }
277
278 #[must_use]
280 pub fn gpt4o(api_key: String) -> Self {
281 Self::new(api_key, MODEL_GPT4O.to_owned())
282 }
283
284 #[must_use]
286 pub fn gpt4o_mini(api_key: String) -> Self {
287 Self::new(api_key, MODEL_GPT4O_MINI.to_owned())
288 }
289
290 #[must_use]
292 pub const fn with_thinking(mut self, thinking: ThinkingConfig) -> Self {
293 self.thinking = Some(thinking);
294 self
295 }
296
297 #[must_use]
299 pub fn with_extra_headers(mut self, headers: Vec<(String, String)>) -> Self {
300 self.extra_headers = headers;
301 self
302 }
303
304 fn apply_headers(&self, builder: reqwest::RequestBuilder) -> reqwest::RequestBuilder {
307 let builder = if self.api_key.is_empty() {
308 builder
309 } else {
310 builder.header("Authorization", format!("Bearer {}", self.api_key))
311 };
312 self.extra_headers
313 .iter()
314 .fold(builder, |b, (k, v)| b.header(k.as_str(), v.as_str()))
315 }
316}
317
318#[async_trait]
319impl LlmProvider for OpenAIProvider {
320 async fn chat(&self, request: ChatRequest) -> Result<ChatOutcome> {
321 if should_use_responses_api(&self.base_url, &self.model, &request) {
323 let mut responses_provider = OpenAIResponsesProvider::with_base_url(
324 self.api_key.clone(),
325 self.model.clone(),
326 self.base_url.clone(),
327 );
328 if let Some(thinking) = self.thinking.clone() {
329 responses_provider = responses_provider.with_thinking(thinking);
330 }
331 return responses_provider.chat(request).await;
332 }
333
334 let thinking_config = match self.resolve_thinking_config(request.thinking.as_ref()) {
335 Ok(thinking) => thinking,
336 Err(error) => return Ok(ChatOutcome::InvalidRequest(error.to_string())),
337 };
338 if let Err(error) = validate_request_attachments(self.provider(), self.model(), &request) {
339 return Ok(ChatOutcome::InvalidRequest(error.to_string()));
340 }
341 let reasoning = build_api_reasoning(thinking_config.as_ref());
342 let messages = build_api_messages(&request);
343 let tools: Option<Vec<ApiTool>> = request
344 .tools
345 .map(|ts| ts.into_iter().map(convert_tool).collect());
346
347 let api_request = build_api_chat_request(
348 &self.model,
349 &messages,
350 request.max_tokens,
351 tools.as_deref(),
352 reasoning,
353 use_max_tokens_alias(&self.base_url),
354 );
355
356 log::debug!(
357 "OpenAI LLM request model={} max_tokens={}",
358 self.model,
359 request.max_tokens
360 );
361
362 let builder = self
363 .client
364 .post(format!("{}/chat/completions", self.base_url))
365 .header("Content-Type", "application/json");
366 let response = self
367 .apply_headers(builder)
368 .json(&api_request)
369 .send()
370 .await
371 .map_err(|e| anyhow::anyhow!("request failed: {e}"))?;
372
373 let status = response.status();
374 let bytes = response
375 .bytes()
376 .await
377 .map_err(|e| anyhow::anyhow!("failed to read response body: {e}"))?;
378
379 log::debug!(
380 "OpenAI LLM response status={} body_len={}",
381 status,
382 bytes.len()
383 );
384
385 if status == StatusCode::TOO_MANY_REQUESTS {
386 return Ok(ChatOutcome::RateLimited);
387 }
388
389 if status.is_server_error() {
390 let body = String::from_utf8_lossy(&bytes);
391 log::error!("OpenAI server error status={status} body={body}");
392 return Ok(ChatOutcome::ServerError(body.into_owned()));
393 }
394
395 if status.is_client_error() {
396 let body = String::from_utf8_lossy(&bytes);
397 log::warn!("OpenAI client error status={status} body={body}");
398 return Ok(ChatOutcome::InvalidRequest(body.into_owned()));
399 }
400
401 let api_response: ApiChatResponse = serde_json::from_slice(&bytes)
402 .map_err(|e| anyhow::anyhow!("failed to parse response: {e}"))?;
403
404 let choice = api_response
405 .choices
406 .into_iter()
407 .next()
408 .ok_or_else(|| anyhow::anyhow!("no choices in response"))?;
409
410 let content = build_content_blocks(&choice.message);
411
412 let stop_reason = choice.finish_reason.as_deref().map(map_finish_reason);
413
414 Ok(ChatOutcome::Success(ChatResponse {
415 id: api_response.id,
416 content,
417 model: api_response.model,
418 stop_reason,
419 usage: Usage {
420 input_tokens: api_response.usage.prompt_tokens,
421 output_tokens: api_response.usage.completion_tokens,
422 cached_input_tokens: api_response
423 .usage
424 .prompt_tokens_details
425 .as_ref()
426 .map_or(0, |details| details.cached_tokens),
427 },
428 }))
429 }
430
431 #[allow(clippy::too_many_lines)]
432 fn chat_stream(&self, request: ChatRequest) -> StreamBox<'_> {
433 if should_use_responses_api(&self.base_url, &self.model, &request) {
435 let api_key = self.api_key.clone();
436 let model = self.model.clone();
437 let base_url = self.base_url.clone();
438 let thinking = self.thinking.clone();
439 return Box::pin(async_stream::stream! {
440 let mut responses_provider =
441 OpenAIResponsesProvider::with_base_url(api_key, model, base_url);
442 if let Some(thinking) = thinking {
443 responses_provider = responses_provider.with_thinking(thinking);
444 }
445 let mut stream = std::pin::pin!(responses_provider.chat_stream(request));
446 while let Some(item) = futures::StreamExt::next(&mut stream).await {
447 yield item;
448 }
449 });
450 }
451
452 Box::pin(async_stream::stream! {
453 let thinking_config = match self.resolve_thinking_config(request.thinking.as_ref()) {
454 Ok(thinking) => thinking,
455 Err(error) => {
456 yield Ok(StreamDelta::Error {
457 message: error.to_string(),
458 recoverable: false,
459 });
460 return;
461 }
462 };
463 if let Err(error) = validate_request_attachments(self.provider(), self.model(), &request) {
464 yield Ok(StreamDelta::Error {
465 message: error.to_string(),
466 recoverable: false,
467 });
468 return;
469 }
470 let reasoning = build_api_reasoning(thinking_config.as_ref());
471 let messages = build_api_messages(&request);
472 let tools: Option<Vec<ApiTool>> = request
473 .tools
474 .map(|ts| ts.into_iter().map(convert_tool).collect());
475
476 let api_request = build_api_chat_request_streaming(
477 &self.model,
478 &messages,
479 request.max_tokens,
480 tools.as_deref(),
481 reasoning,
482 use_max_tokens_alias(&self.base_url),
483 use_stream_usage_options(&self.base_url),
484 );
485
486 log::debug!("OpenAI streaming LLM request model={} max_tokens={}", self.model, request.max_tokens);
487
488 let stream_builder = self.client
489 .post(format!("{}/chat/completions", self.base_url))
490 .header("Content-Type", "application/json");
491 let Ok(response) = self
492 .apply_headers(stream_builder)
493 .json(&api_request)
494 .send()
495 .await
496 else {
497 yield Err(anyhow::anyhow!("request failed"));
498 return;
499 };
500
501 let status = response.status();
502
503 if !status.is_success() {
504 let body = response.text().await.unwrap_or_default();
505 let (recoverable, level) = if status == StatusCode::TOO_MANY_REQUESTS {
506 (true, "rate_limit")
507 } else if status.is_server_error() {
508 (true, "server_error")
509 } else {
510 (false, "client_error")
511 };
512 log::warn!("OpenAI error status={status} body={body} kind={level}");
513 yield Ok(StreamDelta::Error { message: body, recoverable });
514 return;
515 }
516
517 let mut tool_calls: std::collections::HashMap<usize, ToolCallAccumulator> =
519 std::collections::HashMap::new();
520 let mut usage: Option<Usage> = None;
521 let mut buffer = String::new();
522 let mut stream = response.bytes_stream();
523
524 while let Some(chunk_result) = stream.next().await {
525 let Ok(chunk) = chunk_result else {
526 yield Err(anyhow::anyhow!("stream error: {}", chunk_result.unwrap_err()));
527 return;
528 };
529 buffer.push_str(&String::from_utf8_lossy(&chunk));
530
531 while let Some(pos) = buffer.find('\n') {
532 let line = buffer[..pos].trim().to_string();
533 buffer = buffer[pos + 1..].to_string();
534 if line.is_empty() { continue; }
535 let Some(data) = line.strip_prefix("data: ") else { continue; };
536
537 for result in process_sse_data(data) {
538 match result {
539 SseProcessResult::TextDelta(c) => yield Ok(StreamDelta::TextDelta { delta: c, block_index: 0 }),
540 SseProcessResult::ToolCallUpdate { index, id, name, arguments } => apply_tool_call_update(&mut tool_calls, index, id, name, arguments),
541 SseProcessResult::Usage(u) => usage = Some(u),
542 SseProcessResult::Done(sr) => {
543 for d in build_stream_end_deltas(&tool_calls, usage.take(), sr) { yield Ok(d); }
544 return;
545 }
546 SseProcessResult::Sentinel => {
547 let sr = if tool_calls.is_empty() { StopReason::EndTurn } else { StopReason::ToolUse };
548 for d in build_stream_end_deltas(&tool_calls, usage.take(), sr) { yield Ok(d); }
549 return;
550 }
551 }
552 }
553 }
554 }
555
556 for delta in build_stream_end_deltas(&tool_calls, usage, StopReason::EndTurn) {
558 yield Ok(delta);
559 }
560 })
561 }
562
563 fn model(&self) -> &str {
564 &self.model
565 }
566
567 fn provider(&self) -> &'static str {
568 "openai"
569 }
570
571 fn configured_thinking(&self) -> Option<&ThinkingConfig> {
572 self.thinking.as_ref()
573 }
574}
575
576fn apply_tool_call_update(
578 tool_calls: &mut std::collections::HashMap<usize, ToolCallAccumulator>,
579 index: usize,
580 id: Option<String>,
581 name: Option<String>,
582 arguments: Option<String>,
583) {
584 let entry = tool_calls
585 .entry(index)
586 .or_insert_with(|| ToolCallAccumulator {
587 id: String::new(),
588 name: String::new(),
589 arguments: String::new(),
590 });
591 if let Some(id) = id {
592 entry.id = id;
593 }
594 if let Some(name) = name {
595 entry.name = name;
596 }
597 if let Some(args) = arguments {
598 entry.arguments.push_str(&args);
599 }
600}
601
602fn build_stream_end_deltas(
604 tool_calls: &std::collections::HashMap<usize, ToolCallAccumulator>,
605 usage: Option<Usage>,
606 stop_reason: StopReason,
607) -> Vec<StreamDelta> {
608 let mut deltas = Vec::new();
609
610 for (idx, tool) in tool_calls {
612 deltas.push(StreamDelta::ToolUseStart {
613 id: tool.id.clone(),
614 name: tool.name.clone(),
615 block_index: *idx + 1,
616 thought_signature: None,
617 });
618 deltas.push(StreamDelta::ToolInputDelta {
619 id: tool.id.clone(),
620 delta: tool.arguments.clone(),
621 block_index: *idx + 1,
622 });
623 }
624
625 if let Some(u) = usage {
627 deltas.push(StreamDelta::Usage(u));
628 }
629
630 deltas.push(StreamDelta::Done {
632 stop_reason: Some(stop_reason),
633 });
634
635 deltas
636}
637
638enum SseProcessResult {
640 TextDelta(String),
642 ToolCallUpdate {
644 index: usize,
645 id: Option<String>,
646 name: Option<String>,
647 arguments: Option<String>,
648 },
649 Usage(Usage),
651 Done(StopReason),
653 Sentinel,
655}
656
657fn process_sse_data(data: &str) -> Vec<SseProcessResult> {
659 if data == "[DONE]" {
660 return vec![SseProcessResult::Sentinel];
661 }
662
663 let Ok(chunk) = serde_json::from_str::<SseChunk>(data) else {
664 return vec![];
665 };
666
667 let mut results = Vec::new();
668
669 if let Some(u) = chunk.usage {
671 results.push(SseProcessResult::Usage(Usage {
672 input_tokens: u.prompt_tokens,
673 output_tokens: u.completion_tokens,
674 cached_input_tokens: u
675 .prompt_tokens_details
676 .as_ref()
677 .map_or(0, |details| details.cached_tokens),
678 }));
679 }
680
681 if let Some(choice) = chunk.choices.into_iter().next() {
683 if let Some(content) = choice.delta.content
685 && !content.is_empty()
686 {
687 results.push(SseProcessResult::TextDelta(content));
688 }
689
690 if let Some(tc_deltas) = choice.delta.tool_calls {
692 for tc in tc_deltas {
693 results.push(SseProcessResult::ToolCallUpdate {
694 index: tc.index,
695 id: tc.id,
696 name: tc.function.as_ref().and_then(|f| f.name.clone()),
697 arguments: tc.function.as_ref().and_then(|f| f.arguments.clone()),
698 });
699 }
700 }
701
702 if let Some(finish_reason) = choice.finish_reason {
704 results.push(SseProcessResult::Done(map_finish_reason(&finish_reason)));
705 }
706 }
707
708 results
709}
710
711fn use_max_tokens_alias(base_url: &str) -> bool {
712 base_url.contains("moonshot.ai")
713 || base_url.contains("api.z.ai")
714 || base_url.contains("minimax.io")
715}
716
717fn use_stream_usage_options(base_url: &str) -> bool {
718 base_url == DEFAULT_BASE_URL || base_url.contains("api.openai.com")
719}
720
721fn map_finish_reason(finish_reason: &str) -> StopReason {
722 match finish_reason {
723 "stop" => StopReason::EndTurn,
724 "tool_calls" => StopReason::ToolUse,
725 "length" => StopReason::MaxTokens,
726 "content_filter" | "network_error" => StopReason::StopSequence,
727 "sensitive" => StopReason::Refusal,
728 unknown => {
729 log::debug!("Unknown finish_reason from OpenAI-compatible API: {unknown}");
730 StopReason::StopSequence
731 }
732 }
733}
734
735fn build_api_chat_request<'a>(
736 model: &'a str,
737 messages: &'a [ApiMessage],
738 max_tokens: u32,
739 tools: Option<&'a [ApiTool]>,
740 reasoning: Option<ApiReasoning>,
741 include_max_tokens_alias: bool,
742) -> ApiChatRequest<'a> {
743 ApiChatRequest {
744 model,
745 messages,
746 max_completion_tokens: Some(max_tokens),
747 max_tokens: include_max_tokens_alias.then_some(max_tokens),
748 tools,
749 reasoning,
750 }
751}
752
753fn build_api_chat_request_streaming<'a>(
754 model: &'a str,
755 messages: &'a [ApiMessage],
756 max_tokens: u32,
757 tools: Option<&'a [ApiTool]>,
758 reasoning: Option<ApiReasoning>,
759 include_max_tokens_alias: bool,
760 include_stream_usage: bool,
761) -> ApiChatRequestStreaming<'a> {
762 ApiChatRequestStreaming {
763 model,
764 messages,
765 max_completion_tokens: Some(max_tokens),
766 max_tokens: include_max_tokens_alias.then_some(max_tokens),
767 tools,
768 reasoning,
769 stream_options: include_stream_usage.then_some(ApiStreamOptions {
770 include_usage: true,
771 }),
772 stream: true,
773 }
774}
775
776fn build_api_reasoning(thinking: Option<&ThinkingConfig>) -> Option<ApiReasoning> {
777 thinking
778 .and_then(resolve_reasoning_effort)
779 .map(|effort| ApiReasoning { effort })
780}
781
782const fn resolve_reasoning_effort(config: &ThinkingConfig) -> Option<ReasoningEffort> {
783 if let Some(effort) = config.effort {
784 return Some(map_effort(effort));
785 }
786
787 match &config.mode {
788 ThinkingMode::Adaptive => None,
789 ThinkingMode::Enabled { budget_tokens } => Some(map_budget_to_reasoning(*budget_tokens)),
790 }
791}
792
793const fn map_effort(effort: Effort) -> ReasoningEffort {
794 match effort {
795 Effort::Low => ReasoningEffort::Low,
796 Effort::Medium => ReasoningEffort::Medium,
797 Effort::High => ReasoningEffort::High,
798 Effort::Max => ReasoningEffort::XHigh,
799 }
800}
801
802const fn map_budget_to_reasoning(budget_tokens: u32) -> ReasoningEffort {
803 if budget_tokens <= 4_096 {
804 ReasoningEffort::Low
805 } else if budget_tokens <= 16_384 {
806 ReasoningEffort::Medium
807 } else if budget_tokens <= 32_768 {
808 ReasoningEffort::High
809 } else {
810 ReasoningEffort::XHigh
811 }
812}
813
814fn build_api_messages(request: &ChatRequest) -> Vec<ApiMessage> {
815 let mut messages = Vec::new();
816
817 if !request.system.is_empty() {
819 messages.push(ApiMessage {
820 role: ApiRole::System,
821 content: Some(request.system.clone()),
822 tool_calls: None,
823 tool_call_id: None,
824 });
825 }
826
827 for msg in &request.messages {
829 match &msg.content {
830 Content::Text(text) => {
831 messages.push(ApiMessage {
832 role: match msg.role {
833 crate::llm::Role::User => ApiRole::User,
834 crate::llm::Role::Assistant => ApiRole::Assistant,
835 },
836 content: Some(text.clone()),
837 tool_calls: None,
838 tool_call_id: None,
839 });
840 }
841 Content::Blocks(blocks) => {
842 let mut text_parts = Vec::new();
844 let mut tool_calls = Vec::new();
845
846 for block in blocks {
847 match block {
848 ContentBlock::Text { text } => {
849 text_parts.push(text.clone());
850 }
851 ContentBlock::Thinking { .. }
852 | ContentBlock::RedactedThinking { .. }
853 | ContentBlock::Image { .. }
854 | ContentBlock::Document { .. } => {
855 }
857 ContentBlock::ToolUse {
858 id, name, input, ..
859 } => {
860 tool_calls.push(ApiToolCall {
861 id: id.clone(),
862 r#type: "function".to_owned(),
863 function: ApiFunctionCall {
864 name: name.clone(),
865 arguments: serde_json::to_string(input)
866 .unwrap_or_else(|_| "{}".to_owned()),
867 },
868 });
869 }
870 ContentBlock::ToolResult {
871 tool_use_id,
872 content,
873 ..
874 } => {
875 messages.push(ApiMessage {
877 role: ApiRole::Tool,
878 content: Some(content.clone()),
879 tool_calls: None,
880 tool_call_id: Some(tool_use_id.clone()),
881 });
882 }
883 }
884 }
885
886 if !text_parts.is_empty() || !tool_calls.is_empty() {
888 let role = match msg.role {
889 crate::llm::Role::User => ApiRole::User,
890 crate::llm::Role::Assistant => ApiRole::Assistant,
891 };
892
893 if role == ApiRole::Assistant || !text_parts.is_empty() {
895 messages.push(ApiMessage {
896 role,
897 content: if text_parts.is_empty() {
898 None
899 } else {
900 Some(text_parts.join("\n"))
901 },
902 tool_calls: if tool_calls.is_empty() {
903 None
904 } else {
905 Some(tool_calls)
906 },
907 tool_call_id: None,
908 });
909 }
910 }
911 }
912 }
913 }
914
915 messages
916}
917
918fn convert_tool(t: crate::llm::Tool) -> ApiTool {
919 ApiTool {
920 r#type: "function".to_owned(),
921 function: ApiFunction {
922 name: t.name,
923 description: t.description,
924 parameters: t.input_schema,
925 },
926 }
927}
928
929fn build_content_blocks(message: &ApiResponseMessage) -> Vec<ContentBlock> {
930 let mut blocks = Vec::new();
931
932 if let Some(content) = &message.content
934 && !content.is_empty()
935 {
936 blocks.push(ContentBlock::Text {
937 text: content.clone(),
938 });
939 }
940
941 if let Some(tool_calls) = &message.tool_calls {
943 for tc in tool_calls {
944 let input: serde_json::Value = serde_json::from_str(&tc.function.arguments)
945 .unwrap_or_else(|_| serde_json::json!({}));
946 blocks.push(ContentBlock::ToolUse {
947 id: tc.id.clone(),
948 name: tc.function.name.clone(),
949 input,
950 thought_signature: None,
951 });
952 }
953 }
954
955 blocks
956}
957
958#[derive(Serialize)]
963struct ApiChatRequest<'a> {
964 model: &'a str,
965 messages: &'a [ApiMessage],
966 #[serde(skip_serializing_if = "Option::is_none")]
967 max_completion_tokens: Option<u32>,
968 #[serde(skip_serializing_if = "Option::is_none")]
969 max_tokens: Option<u32>,
970 #[serde(skip_serializing_if = "Option::is_none")]
971 tools: Option<&'a [ApiTool]>,
972 #[serde(skip_serializing_if = "Option::is_none")]
973 reasoning: Option<ApiReasoning>,
974}
975
976#[derive(Serialize)]
977struct ApiChatRequestStreaming<'a> {
978 model: &'a str,
979 messages: &'a [ApiMessage],
980 #[serde(skip_serializing_if = "Option::is_none")]
981 max_completion_tokens: Option<u32>,
982 #[serde(skip_serializing_if = "Option::is_none")]
983 max_tokens: Option<u32>,
984 #[serde(skip_serializing_if = "Option::is_none")]
985 tools: Option<&'a [ApiTool]>,
986 #[serde(skip_serializing_if = "Option::is_none")]
987 reasoning: Option<ApiReasoning>,
988 #[serde(skip_serializing_if = "Option::is_none")]
989 stream_options: Option<ApiStreamOptions>,
990 stream: bool,
991}
992
993#[derive(Clone, Copy, Serialize)]
994struct ApiStreamOptions {
995 include_usage: bool,
996}
997
998#[derive(Clone, Copy, Serialize)]
999#[serde(rename_all = "lowercase")]
1000enum ReasoningEffort {
1001 Low,
1002 Medium,
1003 High,
1004 #[serde(rename = "xhigh")]
1005 XHigh,
1006}
1007
1008#[derive(Serialize)]
1009struct ApiReasoning {
1010 effort: ReasoningEffort,
1011}
1012
1013#[derive(Serialize)]
1014struct ApiMessage {
1015 role: ApiRole,
1016 #[serde(skip_serializing_if = "Option::is_none")]
1017 content: Option<String>,
1018 #[serde(skip_serializing_if = "Option::is_none")]
1019 tool_calls: Option<Vec<ApiToolCall>>,
1020 #[serde(skip_serializing_if = "Option::is_none")]
1021 tool_call_id: Option<String>,
1022}
1023
1024#[derive(Debug, Serialize, PartialEq, Eq)]
1025#[serde(rename_all = "lowercase")]
1026enum ApiRole {
1027 System,
1028 User,
1029 Assistant,
1030 Tool,
1031}
1032
1033#[derive(Serialize)]
1034struct ApiToolCall {
1035 id: String,
1036 r#type: String,
1037 function: ApiFunctionCall,
1038}
1039
1040#[derive(Serialize)]
1041struct ApiFunctionCall {
1042 name: String,
1043 arguments: String,
1044}
1045
1046#[derive(Serialize)]
1047struct ApiTool {
1048 r#type: String,
1049 function: ApiFunction,
1050}
1051
1052#[derive(Serialize)]
1053struct ApiFunction {
1054 name: String,
1055 description: String,
1056 parameters: serde_json::Value,
1057}
1058
1059#[derive(Deserialize)]
1064struct ApiChatResponse {
1065 id: String,
1066 choices: Vec<ApiChoice>,
1067 model: String,
1068 usage: ApiUsage,
1069}
1070
1071#[derive(Deserialize)]
1072struct ApiChoice {
1073 message: ApiResponseMessage,
1074 finish_reason: Option<String>,
1075}
1076
1077#[derive(Deserialize)]
1078struct ApiResponseMessage {
1079 content: Option<String>,
1080 tool_calls: Option<Vec<ApiResponseToolCall>>,
1081}
1082
1083#[derive(Deserialize)]
1084struct ApiResponseToolCall {
1085 id: String,
1086 function: ApiResponseFunctionCall,
1087}
1088
1089#[derive(Deserialize)]
1090struct ApiResponseFunctionCall {
1091 name: String,
1092 arguments: String,
1093}
1094
1095#[derive(Deserialize)]
1096struct ApiUsage {
1097 #[serde(deserialize_with = "deserialize_u32_from_number")]
1098 prompt_tokens: u32,
1099 #[serde(deserialize_with = "deserialize_u32_from_number")]
1100 completion_tokens: u32,
1101 #[serde(default)]
1102 prompt_tokens_details: Option<ApiPromptTokensDetails>,
1103}
1104
1105#[derive(Deserialize)]
1106struct ApiPromptTokensDetails {
1107 #[serde(default, deserialize_with = "deserialize_u32_from_number")]
1108 cached_tokens: u32,
1109}
1110
1111struct ToolCallAccumulator {
1117 id: String,
1118 name: String,
1119 arguments: String,
1120}
1121
1122#[derive(Deserialize)]
1124struct SseChunk {
1125 choices: Vec<SseChoice>,
1126 #[serde(default)]
1127 usage: Option<SseUsage>,
1128}
1129
1130#[derive(Deserialize)]
1131struct SseChoice {
1132 delta: SseDelta,
1133 finish_reason: Option<String>,
1134}
1135
1136#[derive(Deserialize)]
1137struct SseDelta {
1138 content: Option<String>,
1139 tool_calls: Option<Vec<SseToolCallDelta>>,
1140}
1141
1142#[derive(Deserialize)]
1143struct SseToolCallDelta {
1144 index: usize,
1145 id: Option<String>,
1146 function: Option<SseFunctionDelta>,
1147}
1148
1149#[derive(Deserialize)]
1150struct SseFunctionDelta {
1151 name: Option<String>,
1152 arguments: Option<String>,
1153}
1154
1155#[derive(Deserialize)]
1156struct SseUsage {
1157 #[serde(deserialize_with = "deserialize_u32_from_number")]
1158 prompt_tokens: u32,
1159 #[serde(deserialize_with = "deserialize_u32_from_number")]
1160 completion_tokens: u32,
1161 #[serde(default)]
1162 prompt_tokens_details: Option<ApiPromptTokensDetails>,
1163}
1164
1165fn deserialize_u32_from_number<'de, D>(deserializer: D) -> std::result::Result<u32, D::Error>
1166where
1167 D: serde::Deserializer<'de>,
1168{
1169 #[derive(Deserialize)]
1170 #[serde(untagged)]
1171 enum NumberLike {
1172 U64(u64),
1173 F64(f64),
1174 }
1175
1176 match NumberLike::deserialize(deserializer)? {
1177 NumberLike::U64(v) => u32::try_from(v)
1178 .map_err(|_| D::Error::custom(format!("token count out of range for u32: {v}"))),
1179 NumberLike::F64(v) => {
1180 if v.is_finite() && v >= 0.0 && v.fract() == 0.0 && v <= f64::from(u32::MAX) {
1181 v.to_string().parse::<u32>().map_err(|e| {
1182 D::Error::custom(format!(
1183 "failed to convert integer-compatible token count {v} to u32: {e}"
1184 ))
1185 })
1186 } else {
1187 Err(D::Error::custom(format!(
1188 "token count must be a non-negative integer-compatible number, got {v}"
1189 )))
1190 }
1191 }
1192 }
1193}
1194
1195#[cfg(test)]
1196mod tests {
1197 use super::*;
1198
1199 #[test]
1204 fn test_new_creates_provider_with_custom_model() {
1205 let provider = OpenAIProvider::new("test-api-key".to_string(), "custom-model".to_string());
1206
1207 assert_eq!(provider.model(), "custom-model");
1208 assert_eq!(provider.provider(), "openai");
1209 assert_eq!(provider.base_url, DEFAULT_BASE_URL);
1210 }
1211
1212 #[test]
1213 fn test_with_base_url_creates_provider_with_custom_url() {
1214 let provider = OpenAIProvider::with_base_url(
1215 "test-api-key".to_string(),
1216 "llama3".to_string(),
1217 "http://localhost:11434/v1".to_string(),
1218 );
1219
1220 assert_eq!(provider.model(), "llama3");
1221 assert_eq!(provider.base_url, "http://localhost:11434/v1");
1222 }
1223
1224 #[test]
1225 fn test_gpt4o_factory_creates_gpt4o_provider() {
1226 let provider = OpenAIProvider::gpt4o("test-api-key".to_string());
1227
1228 assert_eq!(provider.model(), MODEL_GPT4O);
1229 assert_eq!(provider.provider(), "openai");
1230 }
1231
1232 #[test]
1233 fn test_gpt4o_mini_factory_creates_gpt4o_mini_provider() {
1234 let provider = OpenAIProvider::gpt4o_mini("test-api-key".to_string());
1235
1236 assert_eq!(provider.model(), MODEL_GPT4O_MINI);
1237 assert_eq!(provider.provider(), "openai");
1238 }
1239
1240 #[test]
1241 fn test_gpt52_thinking_factory_creates_provider() {
1242 let provider = OpenAIProvider::gpt52_thinking("test-api-key".to_string());
1243
1244 assert_eq!(provider.model(), MODEL_GPT52_THINKING);
1245 assert_eq!(provider.provider(), "openai");
1246 }
1247
1248 #[test]
1249 fn test_gpt54_factory_creates_provider() {
1250 let provider = OpenAIProvider::gpt54("test-api-key".to_string());
1251
1252 assert_eq!(provider.model(), MODEL_GPT54);
1253 assert_eq!(provider.provider(), "openai");
1254 }
1255
1256 #[test]
1257 fn test_gpt53_codex_factory_creates_provider() {
1258 let provider = OpenAIProvider::gpt53_codex("test-api-key".to_string());
1259
1260 assert_eq!(provider.model(), MODEL_GPT53_CODEX);
1261 assert_eq!(provider.provider(), "openai");
1262 }
1263
1264 #[test]
1265 fn test_codex_factory_points_to_latest_codex_model() {
1266 let provider = OpenAIProvider::codex("test-api-key".to_string());
1267
1268 assert_eq!(provider.model(), MODEL_GPT53_CODEX);
1269 assert_eq!(provider.provider(), "openai");
1270 }
1271
1272 #[test]
1273 fn test_gpt5_factory_creates_gpt5_provider() {
1274 let provider = OpenAIProvider::gpt5("test-api-key".to_string());
1275
1276 assert_eq!(provider.model(), MODEL_GPT5);
1277 assert_eq!(provider.provider(), "openai");
1278 }
1279
1280 #[test]
1281 fn test_gpt5_mini_factory_creates_provider() {
1282 let provider = OpenAIProvider::gpt5_mini("test-api-key".to_string());
1283
1284 assert_eq!(provider.model(), MODEL_GPT5_MINI);
1285 assert_eq!(provider.provider(), "openai");
1286 }
1287
1288 #[test]
1289 fn test_o3_factory_creates_o3_provider() {
1290 let provider = OpenAIProvider::o3("test-api-key".to_string());
1291
1292 assert_eq!(provider.model(), MODEL_O3);
1293 assert_eq!(provider.provider(), "openai");
1294 }
1295
1296 #[test]
1297 fn test_o4_mini_factory_creates_o4_mini_provider() {
1298 let provider = OpenAIProvider::o4_mini("test-api-key".to_string());
1299
1300 assert_eq!(provider.model(), MODEL_O4_MINI);
1301 assert_eq!(provider.provider(), "openai");
1302 }
1303
1304 #[test]
1305 fn test_o1_factory_creates_o1_provider() {
1306 let provider = OpenAIProvider::o1("test-api-key".to_string());
1307
1308 assert_eq!(provider.model(), MODEL_O1);
1309 assert_eq!(provider.provider(), "openai");
1310 }
1311
1312 #[test]
1313 fn test_gpt41_factory_creates_gpt41_provider() {
1314 let provider = OpenAIProvider::gpt41("test-api-key".to_string());
1315
1316 assert_eq!(provider.model(), MODEL_GPT41);
1317 assert_eq!(provider.provider(), "openai");
1318 }
1319
1320 #[test]
1321 fn test_kimi_factory_creates_provider_with_kimi_base_url() {
1322 let provider = OpenAIProvider::kimi("test-api-key".to_string(), "kimi-custom".to_string());
1323
1324 assert_eq!(provider.model(), "kimi-custom");
1325 assert_eq!(provider.base_url, BASE_URL_KIMI);
1326 assert_eq!(provider.provider(), "openai");
1327 }
1328
1329 #[test]
1330 fn test_kimi_k2_5_factory_creates_provider() {
1331 let provider = OpenAIProvider::kimi_k2_5("test-api-key".to_string());
1332
1333 assert_eq!(provider.model(), MODEL_KIMI_K2_5);
1334 assert_eq!(provider.base_url, BASE_URL_KIMI);
1335 assert_eq!(provider.provider(), "openai");
1336 }
1337
1338 #[test]
1339 fn test_kimi_k2_thinking_factory_creates_provider() {
1340 let provider = OpenAIProvider::kimi_k2_thinking("test-api-key".to_string());
1341
1342 assert_eq!(provider.model(), MODEL_KIMI_K2_THINKING);
1343 assert_eq!(provider.base_url, BASE_URL_KIMI);
1344 assert_eq!(provider.provider(), "openai");
1345 }
1346
1347 #[test]
1348 fn test_zai_factory_creates_provider_with_zai_base_url() {
1349 let provider = OpenAIProvider::zai("test-api-key".to_string(), "glm-custom".to_string());
1350
1351 assert_eq!(provider.model(), "glm-custom");
1352 assert_eq!(provider.base_url, BASE_URL_ZAI);
1353 assert_eq!(provider.provider(), "openai");
1354 }
1355
1356 #[test]
1357 fn test_zai_glm5_factory_creates_provider() {
1358 let provider = OpenAIProvider::zai_glm5("test-api-key".to_string());
1359
1360 assert_eq!(provider.model(), MODEL_ZAI_GLM5);
1361 assert_eq!(provider.base_url, BASE_URL_ZAI);
1362 assert_eq!(provider.provider(), "openai");
1363 }
1364
1365 #[test]
1366 fn test_minimax_factory_creates_provider_with_minimax_base_url() {
1367 let provider =
1368 OpenAIProvider::minimax("test-api-key".to_string(), "minimax-custom".to_string());
1369
1370 assert_eq!(provider.model(), "minimax-custom");
1371 assert_eq!(provider.base_url, BASE_URL_MINIMAX);
1372 assert_eq!(provider.provider(), "openai");
1373 }
1374
1375 #[test]
1376 fn test_minimax_m2_5_factory_creates_provider() {
1377 let provider = OpenAIProvider::minimax_m2_5("test-api-key".to_string());
1378
1379 assert_eq!(provider.model(), MODEL_MINIMAX_M2_5);
1380 assert_eq!(provider.base_url, BASE_URL_MINIMAX);
1381 assert_eq!(provider.provider(), "openai");
1382 }
1383
1384 #[test]
1389 fn test_model_constants_have_expected_values() {
1390 assert_eq!(MODEL_GPT54, "gpt-5.4");
1392 assert_eq!(MODEL_GPT53_CODEX, "gpt-5.3-codex");
1393 assert_eq!(MODEL_GPT52_INSTANT, "gpt-5.2-instant");
1395 assert_eq!(MODEL_GPT52_THINKING, "gpt-5.2-thinking");
1396 assert_eq!(MODEL_GPT52_PRO, "gpt-5.2-pro");
1397 assert_eq!(MODEL_GPT52_CODEX, "gpt-5.2-codex");
1398 assert_eq!(MODEL_GPT5, "gpt-5");
1400 assert_eq!(MODEL_GPT5_MINI, "gpt-5-mini");
1401 assert_eq!(MODEL_GPT5_NANO, "gpt-5-nano");
1402 assert_eq!(MODEL_O3, "o3");
1404 assert_eq!(MODEL_O3_MINI, "o3-mini");
1405 assert_eq!(MODEL_O4_MINI, "o4-mini");
1406 assert_eq!(MODEL_O1, "o1");
1407 assert_eq!(MODEL_O1_MINI, "o1-mini");
1408 assert_eq!(MODEL_GPT41, "gpt-4.1");
1410 assert_eq!(MODEL_GPT41_MINI, "gpt-4.1-mini");
1411 assert_eq!(MODEL_GPT41_NANO, "gpt-4.1-nano");
1412 assert_eq!(MODEL_GPT4O, "gpt-4o");
1414 assert_eq!(MODEL_GPT4O_MINI, "gpt-4o-mini");
1415 assert_eq!(MODEL_KIMI_K2_5, "kimi-k2.5");
1417 assert_eq!(MODEL_KIMI_K2_THINKING, "kimi-k2-thinking");
1418 assert_eq!(MODEL_ZAI_GLM5, "glm-5");
1419 assert_eq!(MODEL_MINIMAX_M2_5, "MiniMax-M2.5");
1420 assert_eq!(BASE_URL_KIMI, "https://api.moonshot.ai/v1");
1421 assert_eq!(BASE_URL_ZAI, "https://api.z.ai/api/paas/v4");
1422 assert_eq!(BASE_URL_MINIMAX, "https://api.minimax.io/v1");
1423 }
1424
1425 #[test]
1430 fn test_provider_is_cloneable() {
1431 let provider = OpenAIProvider::new("test-api-key".to_string(), "test-model".to_string());
1432 let cloned = provider.clone();
1433
1434 assert_eq!(provider.model(), cloned.model());
1435 assert_eq!(provider.provider(), cloned.provider());
1436 assert_eq!(provider.base_url, cloned.base_url);
1437 }
1438
1439 #[test]
1444 fn test_api_role_serialization() {
1445 let system_role = ApiRole::System;
1446 let user_role = ApiRole::User;
1447 let assistant_role = ApiRole::Assistant;
1448 let tool_role = ApiRole::Tool;
1449
1450 assert_eq!(serde_json::to_string(&system_role).unwrap(), "\"system\"");
1451 assert_eq!(serde_json::to_string(&user_role).unwrap(), "\"user\"");
1452 assert_eq!(
1453 serde_json::to_string(&assistant_role).unwrap(),
1454 "\"assistant\""
1455 );
1456 assert_eq!(serde_json::to_string(&tool_role).unwrap(), "\"tool\"");
1457 }
1458
1459 #[test]
1460 fn test_api_message_serialization_simple() {
1461 let message = ApiMessage {
1462 role: ApiRole::User,
1463 content: Some("Hello, world!".to_string()),
1464 tool_calls: None,
1465 tool_call_id: None,
1466 };
1467
1468 let json = serde_json::to_string(&message).unwrap();
1469 assert!(json.contains("\"role\":\"user\""));
1470 assert!(json.contains("\"content\":\"Hello, world!\""));
1471 assert!(!json.contains("tool_calls"));
1473 assert!(!json.contains("tool_call_id"));
1474 }
1475
1476 #[test]
1477 fn test_api_message_serialization_with_tool_calls() {
1478 let message = ApiMessage {
1479 role: ApiRole::Assistant,
1480 content: Some("Let me help.".to_string()),
1481 tool_calls: Some(vec![ApiToolCall {
1482 id: "call_123".to_string(),
1483 r#type: "function".to_string(),
1484 function: ApiFunctionCall {
1485 name: "read_file".to_string(),
1486 arguments: "{\"path\": \"/test.txt\"}".to_string(),
1487 },
1488 }]),
1489 tool_call_id: None,
1490 };
1491
1492 let json = serde_json::to_string(&message).unwrap();
1493 assert!(json.contains("\"role\":\"assistant\""));
1494 assert!(json.contains("\"tool_calls\""));
1495 assert!(json.contains("\"id\":\"call_123\""));
1496 assert!(json.contains("\"type\":\"function\""));
1497 assert!(json.contains("\"name\":\"read_file\""));
1498 }
1499
1500 #[test]
1501 fn test_api_tool_message_serialization() {
1502 let message = ApiMessage {
1503 role: ApiRole::Tool,
1504 content: Some("File contents here".to_string()),
1505 tool_calls: None,
1506 tool_call_id: Some("call_123".to_string()),
1507 };
1508
1509 let json = serde_json::to_string(&message).unwrap();
1510 assert!(json.contains("\"role\":\"tool\""));
1511 assert!(json.contains("\"tool_call_id\":\"call_123\""));
1512 assert!(json.contains("\"content\":\"File contents here\""));
1513 }
1514
1515 #[test]
1516 fn test_api_tool_serialization() {
1517 let tool = ApiTool {
1518 r#type: "function".to_string(),
1519 function: ApiFunction {
1520 name: "test_tool".to_string(),
1521 description: "A test tool".to_string(),
1522 parameters: serde_json::json!({
1523 "type": "object",
1524 "properties": {
1525 "arg": {"type": "string"}
1526 }
1527 }),
1528 },
1529 };
1530
1531 let json = serde_json::to_string(&tool).unwrap();
1532 assert!(json.contains("\"type\":\"function\""));
1533 assert!(json.contains("\"name\":\"test_tool\""));
1534 assert!(json.contains("\"description\":\"A test tool\""));
1535 assert!(json.contains("\"parameters\""));
1536 }
1537
1538 #[test]
1543 fn test_api_response_deserialization() {
1544 let json = r#"{
1545 "id": "chatcmpl-123",
1546 "choices": [
1547 {
1548 "message": {
1549 "content": "Hello!"
1550 },
1551 "finish_reason": "stop"
1552 }
1553 ],
1554 "model": "gpt-4o",
1555 "usage": {
1556 "prompt_tokens": 100,
1557 "completion_tokens": 50
1558 }
1559 }"#;
1560
1561 let response: ApiChatResponse = serde_json::from_str(json).unwrap();
1562 assert_eq!(response.id, "chatcmpl-123");
1563 assert_eq!(response.model, "gpt-4o");
1564 assert_eq!(response.usage.prompt_tokens, 100);
1565 assert_eq!(response.usage.completion_tokens, 50);
1566 assert_eq!(response.choices.len(), 1);
1567 assert_eq!(
1568 response.choices[0].message.content,
1569 Some("Hello!".to_string())
1570 );
1571 }
1572
1573 #[test]
1574 fn test_api_response_with_tool_calls_deserialization() {
1575 let json = r#"{
1576 "id": "chatcmpl-456",
1577 "choices": [
1578 {
1579 "message": {
1580 "content": null,
1581 "tool_calls": [
1582 {
1583 "id": "call_abc",
1584 "type": "function",
1585 "function": {
1586 "name": "read_file",
1587 "arguments": "{\"path\": \"test.txt\"}"
1588 }
1589 }
1590 ]
1591 },
1592 "finish_reason": "tool_calls"
1593 }
1594 ],
1595 "model": "gpt-4o",
1596 "usage": {
1597 "prompt_tokens": 150,
1598 "completion_tokens": 30
1599 }
1600 }"#;
1601
1602 let response: ApiChatResponse = serde_json::from_str(json).unwrap();
1603 let tool_calls = response.choices[0].message.tool_calls.as_ref().unwrap();
1604 assert_eq!(tool_calls.len(), 1);
1605 assert_eq!(tool_calls[0].id, "call_abc");
1606 assert_eq!(tool_calls[0].function.name, "read_file");
1607 }
1608
1609 #[test]
1610 fn test_api_response_with_unknown_finish_reason_deserialization() {
1611 let json = r#"{
1612 "id": "chatcmpl-789",
1613 "choices": [
1614 {
1615 "message": {
1616 "content": "ok"
1617 },
1618 "finish_reason": "vendor_custom_reason"
1619 }
1620 ],
1621 "model": "glm-5",
1622 "usage": {
1623 "prompt_tokens": 10,
1624 "completion_tokens": 5
1625 }
1626 }"#;
1627
1628 let response: ApiChatResponse = serde_json::from_str(json).unwrap();
1629 assert_eq!(
1630 response.choices[0].finish_reason.as_deref(),
1631 Some("vendor_custom_reason")
1632 );
1633 assert_eq!(
1634 map_finish_reason(response.choices[0].finish_reason.as_deref().unwrap()),
1635 StopReason::StopSequence
1636 );
1637 }
1638
1639 #[test]
1640 fn test_map_finish_reason_covers_vendor_specific_values() {
1641 assert_eq!(map_finish_reason("stop"), StopReason::EndTurn);
1642 assert_eq!(map_finish_reason("tool_calls"), StopReason::ToolUse);
1643 assert_eq!(map_finish_reason("length"), StopReason::MaxTokens);
1644 assert_eq!(
1645 map_finish_reason("content_filter"),
1646 StopReason::StopSequence
1647 );
1648 assert_eq!(map_finish_reason("sensitive"), StopReason::Refusal);
1649 assert_eq!(map_finish_reason("network_error"), StopReason::StopSequence);
1650 assert_eq!(
1651 map_finish_reason("some_new_reason"),
1652 StopReason::StopSequence
1653 );
1654 }
1655
1656 #[test]
1661 fn test_build_api_messages_with_system() {
1662 let request = ChatRequest {
1663 system: "You are helpful.".to_string(),
1664 messages: vec![crate::llm::Message::user("Hello")],
1665 tools: None,
1666 max_tokens: 1024,
1667 max_tokens_explicit: true,
1668 session_id: None,
1669 cached_content: None,
1670 thinking: None,
1671 };
1672
1673 let api_messages = build_api_messages(&request);
1674 assert_eq!(api_messages.len(), 2);
1675 assert_eq!(api_messages[0].role, ApiRole::System);
1676 assert_eq!(
1677 api_messages[0].content,
1678 Some("You are helpful.".to_string())
1679 );
1680 assert_eq!(api_messages[1].role, ApiRole::User);
1681 assert_eq!(api_messages[1].content, Some("Hello".to_string()));
1682 }
1683
1684 #[test]
1685 fn test_build_api_messages_empty_system() {
1686 let request = ChatRequest {
1687 system: String::new(),
1688 messages: vec![crate::llm::Message::user("Hello")],
1689 tools: None,
1690 max_tokens: 1024,
1691 max_tokens_explicit: true,
1692 session_id: None,
1693 cached_content: None,
1694 thinking: None,
1695 };
1696
1697 let api_messages = build_api_messages(&request);
1698 assert_eq!(api_messages.len(), 1);
1699 assert_eq!(api_messages[0].role, ApiRole::User);
1700 }
1701
1702 #[test]
1703 fn test_convert_tool() {
1704 let tool = crate::llm::Tool {
1705 name: "test_tool".to_string(),
1706 description: "A test tool".to_string(),
1707 input_schema: serde_json::json!({"type": "object"}),
1708 };
1709
1710 let api_tool = convert_tool(tool);
1711 assert_eq!(api_tool.r#type, "function");
1712 assert_eq!(api_tool.function.name, "test_tool");
1713 assert_eq!(api_tool.function.description, "A test tool");
1714 }
1715
1716 #[test]
1717 fn test_build_content_blocks_text_only() {
1718 let message = ApiResponseMessage {
1719 content: Some("Hello!".to_string()),
1720 tool_calls: None,
1721 };
1722
1723 let blocks = build_content_blocks(&message);
1724 assert_eq!(blocks.len(), 1);
1725 assert!(matches!(&blocks[0], ContentBlock::Text { text } if text == "Hello!"));
1726 }
1727
1728 #[test]
1729 fn test_build_content_blocks_with_tool_calls() {
1730 let message = ApiResponseMessage {
1731 content: Some("Let me help.".to_string()),
1732 tool_calls: Some(vec![ApiResponseToolCall {
1733 id: "call_123".to_string(),
1734 function: ApiResponseFunctionCall {
1735 name: "read_file".to_string(),
1736 arguments: "{\"path\": \"test.txt\"}".to_string(),
1737 },
1738 }]),
1739 };
1740
1741 let blocks = build_content_blocks(&message);
1742 assert_eq!(blocks.len(), 2);
1743 assert!(matches!(&blocks[0], ContentBlock::Text { text } if text == "Let me help."));
1744 assert!(
1745 matches!(&blocks[1], ContentBlock::ToolUse { id, name, .. } if id == "call_123" && name == "read_file")
1746 );
1747 }
1748
1749 #[test]
1754 fn test_sse_chunk_text_delta_deserialization() {
1755 let json = r#"{
1756 "choices": [{
1757 "delta": {
1758 "content": "Hello"
1759 },
1760 "finish_reason": null
1761 }]
1762 }"#;
1763
1764 let chunk: SseChunk = serde_json::from_str(json).unwrap();
1765 assert_eq!(chunk.choices.len(), 1);
1766 assert_eq!(chunk.choices[0].delta.content, Some("Hello".to_string()));
1767 assert!(chunk.choices[0].finish_reason.is_none());
1768 }
1769
1770 #[test]
1771 fn test_sse_chunk_tool_call_delta_deserialization() {
1772 let json = r#"{
1773 "choices": [{
1774 "delta": {
1775 "tool_calls": [{
1776 "index": 0,
1777 "id": "call_abc",
1778 "function": {
1779 "name": "read_file",
1780 "arguments": ""
1781 }
1782 }]
1783 },
1784 "finish_reason": null
1785 }]
1786 }"#;
1787
1788 let chunk: SseChunk = serde_json::from_str(json).unwrap();
1789 let tool_calls = chunk.choices[0].delta.tool_calls.as_ref().unwrap();
1790 assert_eq!(tool_calls.len(), 1);
1791 assert_eq!(tool_calls[0].index, 0);
1792 assert_eq!(tool_calls[0].id, Some("call_abc".to_string()));
1793 assert_eq!(
1794 tool_calls[0].function.as_ref().unwrap().name,
1795 Some("read_file".to_string())
1796 );
1797 }
1798
1799 #[test]
1800 fn test_sse_chunk_tool_call_arguments_delta_deserialization() {
1801 let json = r#"{
1802 "choices": [{
1803 "delta": {
1804 "tool_calls": [{
1805 "index": 0,
1806 "function": {
1807 "arguments": "{\"path\":"
1808 }
1809 }]
1810 },
1811 "finish_reason": null
1812 }]
1813 }"#;
1814
1815 let chunk: SseChunk = serde_json::from_str(json).unwrap();
1816 let tool_calls = chunk.choices[0].delta.tool_calls.as_ref().unwrap();
1817 assert_eq!(tool_calls[0].id, None);
1818 assert_eq!(
1819 tool_calls[0].function.as_ref().unwrap().arguments,
1820 Some("{\"path\":".to_string())
1821 );
1822 }
1823
1824 #[test]
1825 fn test_sse_chunk_with_finish_reason_deserialization() {
1826 let json = r#"{
1827 "choices": [{
1828 "delta": {},
1829 "finish_reason": "stop"
1830 }]
1831 }"#;
1832
1833 let chunk: SseChunk = serde_json::from_str(json).unwrap();
1834 assert_eq!(chunk.choices[0].finish_reason.as_deref(), Some("stop"));
1835 }
1836
1837 #[test]
1838 fn test_sse_chunk_with_usage_deserialization() {
1839 let json = r#"{
1840 "choices": [{
1841 "delta": {},
1842 "finish_reason": "stop"
1843 }],
1844 "usage": {
1845 "prompt_tokens": 100,
1846 "completion_tokens": 50
1847 }
1848 }"#;
1849
1850 let chunk: SseChunk = serde_json::from_str(json).unwrap();
1851 let usage = chunk.usage.unwrap();
1852 assert_eq!(usage.prompt_tokens, 100);
1853 assert_eq!(usage.completion_tokens, 50);
1854 }
1855
1856 #[test]
1857 fn test_sse_chunk_with_float_usage_deserialization() {
1858 let json = r#"{
1859 "choices": [{
1860 "delta": {},
1861 "finish_reason": "stop"
1862 }],
1863 "usage": {
1864 "prompt_tokens": 100.0,
1865 "completion_tokens": 50.0
1866 }
1867 }"#;
1868
1869 let chunk: SseChunk = serde_json::from_str(json).unwrap();
1870 let usage = chunk.usage.unwrap();
1871 assert_eq!(usage.prompt_tokens, 100);
1872 assert_eq!(usage.completion_tokens, 50);
1873 }
1874
1875 #[test]
1876 fn test_api_usage_deserializes_integer_compatible_numbers() {
1877 let json = r#"{
1878 "prompt_tokens": 42.0,
1879 "completion_tokens": 7
1880 }"#;
1881
1882 let usage: ApiUsage = serde_json::from_str(json).unwrap();
1883 assert_eq!(usage.prompt_tokens, 42);
1884 assert_eq!(usage.completion_tokens, 7);
1885 }
1886
1887 #[test]
1888 fn test_api_usage_deserializes_cached_tokens() {
1889 let json = r#"{
1890 "prompt_tokens": 42,
1891 "completion_tokens": 7,
1892 "prompt_tokens_details": {
1893 "cached_tokens": 10
1894 }
1895 }"#;
1896
1897 let usage: ApiUsage = serde_json::from_str(json).unwrap();
1898 assert_eq!(usage.prompt_tokens, 42);
1899 assert_eq!(usage.completion_tokens, 7);
1900 assert_eq!(usage.prompt_tokens_details.unwrap().cached_tokens, 10);
1901 }
1902
1903 #[test]
1904 fn test_api_usage_rejects_fractional_numbers() {
1905 let json = r#"{
1906 "prompt_tokens": 42.5,
1907 "completion_tokens": 7
1908 }"#;
1909
1910 let usage: std::result::Result<ApiUsage, _> = serde_json::from_str(json);
1911 assert!(usage.is_err());
1912 }
1913
1914 #[test]
1915 fn test_use_max_tokens_alias_for_vendor_urls() {
1916 assert!(!use_max_tokens_alias(DEFAULT_BASE_URL));
1917 assert!(use_max_tokens_alias(BASE_URL_KIMI));
1918 assert!(use_max_tokens_alias(BASE_URL_ZAI));
1919 assert!(use_max_tokens_alias(BASE_URL_MINIMAX));
1920 }
1921
1922 #[test]
1923 fn test_requires_responses_api_only_for_legacy_codex_model() {
1924 assert!(requires_responses_api(MODEL_GPT52_CODEX));
1925 assert!(!requires_responses_api(MODEL_GPT53_CODEX));
1926 assert!(!requires_responses_api(MODEL_GPT54));
1927 }
1928
1929 #[test]
1930 fn test_should_use_responses_api_for_official_agentic_requests() {
1931 let request = ChatRequest {
1932 system: String::new(),
1933 messages: vec![crate::llm::Message::user("Hello")],
1934 tools: Some(vec![crate::llm::Tool {
1935 name: "read_file".to_string(),
1936 description: "Read a file".to_string(),
1937 input_schema: serde_json::json!({"type": "object"}),
1938 }]),
1939 max_tokens: 1024,
1940 max_tokens_explicit: true,
1941 session_id: Some("thread-1".to_string()),
1942 cached_content: None,
1943 thinking: None,
1944 };
1945
1946 assert!(should_use_responses_api(
1947 DEFAULT_BASE_URL,
1948 MODEL_GPT54,
1949 &request
1950 ));
1951 assert!(!should_use_responses_api(
1952 BASE_URL_KIMI,
1953 MODEL_GPT54,
1954 &request
1955 ));
1956 }
1957
1958 #[test]
1959 fn test_build_api_reasoning_maps_enabled_budget_to_effort() {
1960 let reasoning = build_api_reasoning(Some(&ThinkingConfig::new(40_000))).unwrap();
1961 assert!(matches!(reasoning.effort, ReasoningEffort::XHigh));
1962 }
1963
1964 #[test]
1965 fn test_build_api_reasoning_uses_explicit_effort() {
1966 let reasoning =
1967 build_api_reasoning(Some(&ThinkingConfig::adaptive_with_effort(Effort::High))).unwrap();
1968 assert!(matches!(reasoning.effort, ReasoningEffort::High));
1969 }
1970
1971 #[test]
1972 fn test_build_api_reasoning_omits_adaptive_without_effort() {
1973 assert!(build_api_reasoning(Some(&ThinkingConfig::adaptive())).is_none());
1974 }
1975
1976 #[test]
1977 fn test_openai_rejects_adaptive_thinking() {
1978 let provider = OpenAIProvider::gpt54("test-key".to_string());
1979 let error = provider
1980 .validate_thinking_config(Some(&ThinkingConfig::adaptive()))
1981 .unwrap_err();
1982 assert!(
1983 error
1984 .to_string()
1985 .contains("adaptive thinking is not supported")
1986 );
1987 }
1988
1989 #[test]
1990 fn test_openai_non_reasoning_models_reject_thinking() {
1991 let provider = OpenAIProvider::gpt4o("test-key".to_string());
1992 let error = provider
1993 .validate_thinking_config(Some(&ThinkingConfig::new(10_000)))
1994 .unwrap_err();
1995 assert!(error.to_string().contains("thinking is not supported"));
1996 }
1997
1998 #[test]
1999 fn test_request_serialization_openai_uses_max_completion_tokens_only() {
2000 let messages = vec![ApiMessage {
2001 role: ApiRole::User,
2002 content: Some("Hello".to_string()),
2003 tool_calls: None,
2004 tool_call_id: None,
2005 }];
2006
2007 let request = ApiChatRequest {
2008 model: "gpt-4o",
2009 messages: &messages,
2010 max_completion_tokens: Some(1024),
2011 max_tokens: None,
2012 tools: None,
2013 reasoning: None,
2014 };
2015
2016 let json = serde_json::to_string(&request).unwrap();
2017 assert!(json.contains("\"max_completion_tokens\":1024"));
2018 assert!(!json.contains("\"max_tokens\""));
2019 }
2020
2021 #[test]
2022 fn test_request_serialization_with_max_tokens_alias() {
2023 let messages = vec![ApiMessage {
2024 role: ApiRole::User,
2025 content: Some("Hello".to_string()),
2026 tool_calls: None,
2027 tool_call_id: None,
2028 }];
2029
2030 let request = ApiChatRequest {
2031 model: "glm-5",
2032 messages: &messages,
2033 max_completion_tokens: Some(1024),
2034 max_tokens: Some(1024),
2035 tools: None,
2036 reasoning: None,
2037 };
2038
2039 let json = serde_json::to_string(&request).unwrap();
2040 assert!(json.contains("\"max_completion_tokens\":1024"));
2041 assert!(json.contains("\"max_tokens\":1024"));
2042 }
2043
2044 #[test]
2045 fn test_streaming_request_serialization_openai_default() {
2046 let messages = vec![ApiMessage {
2047 role: ApiRole::User,
2048 content: Some("Hello".to_string()),
2049 tool_calls: None,
2050 tool_call_id: None,
2051 }];
2052
2053 let request = ApiChatRequestStreaming {
2054 model: "gpt-4o",
2055 messages: &messages,
2056 max_completion_tokens: Some(1024),
2057 max_tokens: None,
2058 tools: None,
2059 reasoning: None,
2060 stream_options: Some(ApiStreamOptions {
2061 include_usage: true,
2062 }),
2063 stream: true,
2064 };
2065
2066 let json = serde_json::to_string(&request).unwrap();
2067 assert!(json.contains("\"stream\":true"));
2068 assert!(json.contains("\"model\":\"gpt-4o\""));
2069 assert!(json.contains("\"max_completion_tokens\":1024"));
2070 assert!(json.contains("\"stream_options\":{\"include_usage\":true}"));
2071 assert!(!json.contains("\"max_tokens\""));
2072 }
2073
2074 #[test]
2075 fn test_streaming_request_serialization_with_max_tokens_alias() {
2076 let messages = vec![ApiMessage {
2077 role: ApiRole::User,
2078 content: Some("Hello".to_string()),
2079 tool_calls: None,
2080 tool_call_id: None,
2081 }];
2082
2083 let request = ApiChatRequestStreaming {
2084 model: "kimi-k2-thinking",
2085 messages: &messages,
2086 max_completion_tokens: Some(1024),
2087 max_tokens: Some(1024),
2088 tools: None,
2089 reasoning: None,
2090 stream_options: None,
2091 stream: true,
2092 };
2093
2094 let json = serde_json::to_string(&request).unwrap();
2095 assert!(json.contains("\"max_completion_tokens\":1024"));
2096 assert!(json.contains("\"max_tokens\":1024"));
2097 assert!(!json.contains("\"stream_options\""));
2098 }
2099
2100 #[test]
2101 fn test_request_serialization_includes_reasoning_when_present() {
2102 let messages = vec![ApiMessage {
2103 role: ApiRole::User,
2104 content: Some("Hello".to_string()),
2105 tool_calls: None,
2106 tool_call_id: None,
2107 }];
2108
2109 let request = ApiChatRequest {
2110 model: MODEL_GPT54,
2111 messages: &messages,
2112 max_completion_tokens: Some(1024),
2113 max_tokens: None,
2114 tools: None,
2115 reasoning: Some(ApiReasoning {
2116 effort: ReasoningEffort::High,
2117 }),
2118 };
2119
2120 let json = serde_json::to_string(&request).unwrap();
2121 assert!(json.contains("\"reasoning\":{\"effort\":\"high\"}"));
2122 }
2123}