Skip to main content

agent_sdk/providers/
openai.rs

1//! `OpenAI` API provider implementation.
2//!
3//! This module provides an implementation of `LlmProvider` for the `OpenAI`
4//! Chat Completions API. It also supports `OpenAI`-compatible APIs (Ollama, vLLM, etc.)
5//! via the `with_base_url` constructor.
6//!
7//! Legacy models that require the Responses API (like `gpt-5.2-codex`) are automatically
8//! routed to the correct endpoint.
9
10use crate::llm::{
11    ChatOutcome, ChatRequest, ChatResponse, Content, ContentBlock, Effort, LlmProvider, StopReason,
12    StreamBox, StreamDelta, ThinkingConfig, ThinkingMode, Usage,
13};
14use anyhow::Result;
15use async_trait::async_trait;
16use futures::StreamExt;
17use reqwest::StatusCode;
18use serde::de::Error as _;
19use serde::{Deserialize, Serialize};
20
21use super::openai_responses::OpenAIResponsesProvider;
22
23const DEFAULT_BASE_URL: &str = "https://api.openai.com/v1";
24
25/// Check if a model requires the Responses API instead of Chat Completions.
26fn requires_responses_api(model: &str) -> bool {
27    model == MODEL_GPT52_CODEX
28}
29
30// GPT-5.4 series
31pub const MODEL_GPT54: &str = "gpt-5.4";
32
33// GPT-5.3 Codex series
34pub const MODEL_GPT53_CODEX: &str = "gpt-5.3-codex";
35
36// GPT-5.2 series
37pub const MODEL_GPT52_INSTANT: &str = "gpt-5.2-instant";
38pub const MODEL_GPT52_THINKING: &str = "gpt-5.2-thinking";
39pub const MODEL_GPT52_PRO: &str = "gpt-5.2-pro";
40pub const MODEL_GPT52_CODEX: &str = "gpt-5.2-codex";
41
42// GPT-5 series (400k context)
43pub const MODEL_GPT5: &str = "gpt-5";
44pub const MODEL_GPT5_MINI: &str = "gpt-5-mini";
45pub const MODEL_GPT5_NANO: &str = "gpt-5-nano";
46
47// o-series reasoning models
48pub const MODEL_O3: &str = "o3";
49pub const MODEL_O3_MINI: &str = "o3-mini";
50pub const MODEL_O4_MINI: &str = "o4-mini";
51pub const MODEL_O1: &str = "o1";
52pub const MODEL_O1_MINI: &str = "o1-mini";
53
54// GPT-4.1 series (improved instruction following, 1M context)
55pub const MODEL_GPT41: &str = "gpt-4.1";
56pub const MODEL_GPT41_MINI: &str = "gpt-4.1-mini";
57pub const MODEL_GPT41_NANO: &str = "gpt-4.1-nano";
58
59// GPT-4o series
60pub const MODEL_GPT4O: &str = "gpt-4o";
61pub const MODEL_GPT4O_MINI: &str = "gpt-4o-mini";
62
63// OpenAI-compatible vendor defaults
64pub const BASE_URL_KIMI: &str = "https://api.moonshot.ai/v1";
65pub const BASE_URL_ZAI: &str = "https://api.z.ai/api/paas/v4";
66pub const BASE_URL_MINIMAX: &str = "https://api.minimax.io/v1";
67pub const MODEL_KIMI_K2_5: &str = "kimi-k2.5";
68pub const MODEL_KIMI_K2_THINKING: &str = "kimi-k2-thinking";
69pub const MODEL_ZAI_GLM5: &str = "glm-5";
70pub const MODEL_MINIMAX_M2_5: &str = "MiniMax-M2.5";
71
72/// `OpenAI` LLM provider using the Chat Completions API.
73///
74/// Also supports `OpenAI`-compatible APIs (Ollama, vLLM, Azure `OpenAI`, etc.)
75/// via the `with_base_url` constructor.
76#[derive(Clone)]
77pub struct OpenAIProvider {
78    client: reqwest::Client,
79    api_key: String,
80    model: String,
81    base_url: String,
82    thinking: Option<ThinkingConfig>,
83}
84
85impl OpenAIProvider {
86    /// Create a new `OpenAI` provider with the specified API key and model.
87    #[must_use]
88    pub fn new(api_key: String, model: String) -> Self {
89        Self {
90            client: reqwest::Client::new(),
91            api_key,
92            model,
93            base_url: DEFAULT_BASE_URL.to_owned(),
94            thinking: None,
95        }
96    }
97
98    /// Create a new provider with a custom base URL for OpenAI-compatible APIs.
99    #[must_use]
100    pub fn with_base_url(api_key: String, model: String, base_url: String) -> Self {
101        Self {
102            client: reqwest::Client::new(),
103            api_key,
104            model,
105            base_url,
106            thinking: None,
107        }
108    }
109
110    /// Create a provider using Moonshot KIMI via OpenAI-compatible Chat Completions.
111    #[must_use]
112    pub fn kimi(api_key: String, model: String) -> Self {
113        Self::with_base_url(api_key, model, BASE_URL_KIMI.to_owned())
114    }
115
116    /// Create a provider using KIMI K2.5 (default KIMI model).
117    #[must_use]
118    pub fn kimi_k2_5(api_key: String) -> Self {
119        Self::kimi(api_key, MODEL_KIMI_K2_5.to_owned())
120    }
121
122    /// Create a provider using KIMI K2 Thinking.
123    #[must_use]
124    pub fn kimi_k2_thinking(api_key: String) -> Self {
125        Self::kimi(api_key, MODEL_KIMI_K2_THINKING.to_owned())
126    }
127
128    /// Create a provider using z.ai via OpenAI-compatible Chat Completions.
129    #[must_use]
130    pub fn zai(api_key: String, model: String) -> Self {
131        Self::with_base_url(api_key, model, BASE_URL_ZAI.to_owned())
132    }
133
134    /// Create a provider using z.ai GLM-5 (default z.ai agentic reasoning model).
135    #[must_use]
136    pub fn zai_glm5(api_key: String) -> Self {
137        Self::zai(api_key, MODEL_ZAI_GLM5.to_owned())
138    }
139
140    /// Create a provider using `MiniMax` via OpenAI-compatible Chat Completions.
141    #[must_use]
142    pub fn minimax(api_key: String, model: String) -> Self {
143        Self::with_base_url(api_key, model, BASE_URL_MINIMAX.to_owned())
144    }
145
146    /// Create a provider using `MiniMax` M2.5 (default `MiniMax` model).
147    #[must_use]
148    pub fn minimax_m2_5(api_key: String) -> Self {
149        Self::minimax(api_key, MODEL_MINIMAX_M2_5.to_owned())
150    }
151
152    /// Create a provider using GPT-5.2 Instant (speed-optimized for routine queries).
153    #[must_use]
154    pub fn gpt52_instant(api_key: String) -> Self {
155        Self::new(api_key, MODEL_GPT52_INSTANT.to_owned())
156    }
157
158    /// Create a provider using GPT-5.4 (frontier reasoning with 1.05M context).
159    #[must_use]
160    pub fn gpt54(api_key: String) -> Self {
161        Self::new(api_key, MODEL_GPT54.to_owned())
162    }
163
164    /// Create a provider using GPT-5.3 Codex (latest codex model).
165    #[must_use]
166    pub fn gpt53_codex(api_key: String) -> Self {
167        Self::new(api_key, MODEL_GPT53_CODEX.to_owned())
168    }
169
170    /// Create a provider using GPT-5.2 Thinking (complex reasoning, coding, analysis).
171    #[must_use]
172    pub fn gpt52_thinking(api_key: String) -> Self {
173        Self::new(api_key, MODEL_GPT52_THINKING.to_owned())
174    }
175
176    /// Create a provider using GPT-5.2 Pro (maximum accuracy for difficult problems).
177    #[must_use]
178    pub fn gpt52_pro(api_key: String) -> Self {
179        Self::new(api_key, MODEL_GPT52_PRO.to_owned())
180    }
181
182    /// Create a provider using the latest Codex model.
183    #[must_use]
184    pub fn codex(api_key: String) -> Self {
185        Self::gpt53_codex(api_key)
186    }
187
188    /// Create a provider using GPT-5 (400k context, coding and reasoning).
189    #[must_use]
190    pub fn gpt5(api_key: String) -> Self {
191        Self::new(api_key, MODEL_GPT5.to_owned())
192    }
193
194    /// Create a provider using GPT-5-mini (faster, cost-efficient GPT-5).
195    #[must_use]
196    pub fn gpt5_mini(api_key: String) -> Self {
197        Self::new(api_key, MODEL_GPT5_MINI.to_owned())
198    }
199
200    /// Create a provider using GPT-5-nano (fastest, cheapest GPT-5 variant).
201    #[must_use]
202    pub fn gpt5_nano(api_key: String) -> Self {
203        Self::new(api_key, MODEL_GPT5_NANO.to_owned())
204    }
205
206    /// Create a provider using o3 (most intelligent reasoning model).
207    #[must_use]
208    pub fn o3(api_key: String) -> Self {
209        Self::new(api_key, MODEL_O3.to_owned())
210    }
211
212    /// Create a provider using o3-mini (smaller o3 variant).
213    #[must_use]
214    pub fn o3_mini(api_key: String) -> Self {
215        Self::new(api_key, MODEL_O3_MINI.to_owned())
216    }
217
218    /// Create a provider using o4-mini (fast, cost-efficient reasoning).
219    #[must_use]
220    pub fn o4_mini(api_key: String) -> Self {
221        Self::new(api_key, MODEL_O4_MINI.to_owned())
222    }
223
224    /// Create a provider using o1 (reasoning model).
225    #[must_use]
226    pub fn o1(api_key: String) -> Self {
227        Self::new(api_key, MODEL_O1.to_owned())
228    }
229
230    /// Create a provider using o1-mini (fast reasoning model).
231    #[must_use]
232    pub fn o1_mini(api_key: String) -> Self {
233        Self::new(api_key, MODEL_O1_MINI.to_owned())
234    }
235
236    /// Create a provider using GPT-4.1 (improved instruction following, 1M context).
237    #[must_use]
238    pub fn gpt41(api_key: String) -> Self {
239        Self::new(api_key, MODEL_GPT41.to_owned())
240    }
241
242    /// Create a provider using GPT-4.1-mini (smaller, faster GPT-4.1).
243    #[must_use]
244    pub fn gpt41_mini(api_key: String) -> Self {
245        Self::new(api_key, MODEL_GPT41_MINI.to_owned())
246    }
247
248    /// Create a provider using GPT-4o.
249    #[must_use]
250    pub fn gpt4o(api_key: String) -> Self {
251        Self::new(api_key, MODEL_GPT4O.to_owned())
252    }
253
254    /// Create a provider using GPT-4o-mini (fast and cost-effective).
255    #[must_use]
256    pub fn gpt4o_mini(api_key: String) -> Self {
257        Self::new(api_key, MODEL_GPT4O_MINI.to_owned())
258    }
259
260    /// Set the provider-owned thinking configuration for this model.
261    #[must_use]
262    pub const fn with_thinking(mut self, thinking: ThinkingConfig) -> Self {
263        self.thinking = Some(thinking);
264        self
265    }
266}
267
268#[async_trait]
269impl LlmProvider for OpenAIProvider {
270    async fn chat(&self, request: ChatRequest) -> Result<ChatOutcome> {
271        // Route to Responses API for models that require it (e.g., gpt-5.2-codex)
272        if requires_responses_api(&self.model) {
273            let mut responses_provider = OpenAIResponsesProvider::with_base_url(
274                self.api_key.clone(),
275                self.model.clone(),
276                self.base_url.clone(),
277            );
278            if let Some(thinking) = self.thinking.clone() {
279                responses_provider = responses_provider.with_thinking(thinking);
280            }
281            return responses_provider.chat(request).await;
282        }
283
284        let thinking_config = match self.resolve_thinking_config(request.thinking.as_ref()) {
285            Ok(thinking) => thinking,
286            Err(error) => return Ok(ChatOutcome::InvalidRequest(error.to_string())),
287        };
288        let reasoning = build_api_reasoning(thinking_config.as_ref());
289        let messages = build_api_messages(&request);
290        let tools: Option<Vec<ApiTool>> = request
291            .tools
292            .map(|ts| ts.into_iter().map(convert_tool).collect());
293
294        let api_request = build_api_chat_request(
295            &self.model,
296            &messages,
297            request.max_tokens,
298            tools.as_deref(),
299            reasoning,
300            use_max_tokens_alias(&self.base_url),
301        );
302
303        log::debug!(
304            "OpenAI LLM request model={} max_tokens={}",
305            self.model,
306            request.max_tokens
307        );
308
309        let response = self
310            .client
311            .post(format!("{}/chat/completions", self.base_url))
312            .header("Content-Type", "application/json")
313            .header("Authorization", format!("Bearer {}", self.api_key))
314            .json(&api_request)
315            .send()
316            .await
317            .map_err(|e| anyhow::anyhow!("request failed: {e}"))?;
318
319        let status = response.status();
320        let bytes = response
321            .bytes()
322            .await
323            .map_err(|e| anyhow::anyhow!("failed to read response body: {e}"))?;
324
325        log::debug!(
326            "OpenAI LLM response status={} body_len={}",
327            status,
328            bytes.len()
329        );
330
331        if status == StatusCode::TOO_MANY_REQUESTS {
332            return Ok(ChatOutcome::RateLimited);
333        }
334
335        if status.is_server_error() {
336            let body = String::from_utf8_lossy(&bytes);
337            log::error!("OpenAI server error status={status} body={body}");
338            return Ok(ChatOutcome::ServerError(body.into_owned()));
339        }
340
341        if status.is_client_error() {
342            let body = String::from_utf8_lossy(&bytes);
343            log::warn!("OpenAI client error status={status} body={body}");
344            return Ok(ChatOutcome::InvalidRequest(body.into_owned()));
345        }
346
347        let api_response: ApiChatResponse = serde_json::from_slice(&bytes)
348            .map_err(|e| anyhow::anyhow!("failed to parse response: {e}"))?;
349
350        let choice = api_response
351            .choices
352            .into_iter()
353            .next()
354            .ok_or_else(|| anyhow::anyhow!("no choices in response"))?;
355
356        let content = build_content_blocks(&choice.message);
357
358        let stop_reason = choice.finish_reason.as_deref().map(map_finish_reason);
359
360        Ok(ChatOutcome::Success(ChatResponse {
361            id: api_response.id,
362            content,
363            model: api_response.model,
364            stop_reason,
365            usage: Usage {
366                input_tokens: api_response.usage.prompt_tokens,
367                output_tokens: api_response.usage.completion_tokens,
368            },
369        }))
370    }
371
372    #[allow(clippy::too_many_lines)]
373    fn chat_stream(&self, request: ChatRequest) -> StreamBox<'_> {
374        // Route to Responses API for models that require it (e.g., gpt-5.2-codex)
375        if requires_responses_api(&self.model) {
376            let api_key = self.api_key.clone();
377            let model = self.model.clone();
378            let base_url = self.base_url.clone();
379            let thinking = self.thinking.clone();
380            return Box::pin(async_stream::stream! {
381                let mut responses_provider =
382                    OpenAIResponsesProvider::with_base_url(api_key, model, base_url);
383                if let Some(thinking) = thinking {
384                    responses_provider = responses_provider.with_thinking(thinking);
385                }
386                let mut stream = std::pin::pin!(responses_provider.chat_stream(request));
387                while let Some(item) = futures::StreamExt::next(&mut stream).await {
388                    yield item;
389                }
390            });
391        }
392
393        Box::pin(async_stream::stream! {
394            let thinking_config = match self.resolve_thinking_config(request.thinking.as_ref()) {
395                Ok(thinking) => thinking,
396                Err(error) => {
397                    yield Ok(StreamDelta::Error {
398                        message: error.to_string(),
399                        recoverable: false,
400                    });
401                    return;
402                }
403            };
404            let reasoning = build_api_reasoning(thinking_config.as_ref());
405            let messages = build_api_messages(&request);
406            let tools: Option<Vec<ApiTool>> = request
407                .tools
408                .map(|ts| ts.into_iter().map(convert_tool).collect());
409
410            let api_request = build_api_chat_request_streaming(
411                &self.model,
412                &messages,
413                request.max_tokens,
414                tools.as_deref(),
415                reasoning,
416                use_max_tokens_alias(&self.base_url),
417            );
418
419            log::debug!("OpenAI streaming LLM request model={} max_tokens={}", self.model, request.max_tokens);
420
421            let Ok(response) = self.client
422                .post(format!("{}/chat/completions", self.base_url))
423                .header("Content-Type", "application/json")
424                .header("Authorization", format!("Bearer {}", self.api_key))
425                .json(&api_request)
426                .send()
427                .await
428            else {
429                yield Err(anyhow::anyhow!("request failed"));
430                return;
431            };
432
433            let status = response.status();
434
435            if !status.is_success() {
436                let body = response.text().await.unwrap_or_default();
437                let (recoverable, level) = if status == StatusCode::TOO_MANY_REQUESTS {
438                    (true, "rate_limit")
439                } else if status.is_server_error() {
440                    (true, "server_error")
441                } else {
442                    (false, "client_error")
443                };
444                log::warn!("OpenAI error status={status} body={body} kind={level}");
445                yield Ok(StreamDelta::Error { message: body, recoverable });
446                return;
447            }
448
449            // Track tool call state across deltas
450            let mut tool_calls: std::collections::HashMap<usize, ToolCallAccumulator> =
451                std::collections::HashMap::new();
452            let mut usage: Option<Usage> = None;
453            let mut buffer = String::new();
454            let mut stream = response.bytes_stream();
455
456            while let Some(chunk_result) = stream.next().await {
457                let Ok(chunk) = chunk_result else {
458                    yield Err(anyhow::anyhow!("stream error: {}", chunk_result.unwrap_err()));
459                    return;
460                };
461                buffer.push_str(&String::from_utf8_lossy(&chunk));
462
463                while let Some(pos) = buffer.find('\n') {
464                    let line = buffer[..pos].trim().to_string();
465                    buffer = buffer[pos + 1..].to_string();
466                    if line.is_empty() { continue; }
467                    let Some(data) = line.strip_prefix("data: ") else { continue; };
468
469                    for result in process_sse_data(data) {
470                        match result {
471                            SseProcessResult::TextDelta(c) => yield Ok(StreamDelta::TextDelta { delta: c, block_index: 0 }),
472                            SseProcessResult::ToolCallUpdate { index, id, name, arguments } => apply_tool_call_update(&mut tool_calls, index, id, name, arguments),
473                            SseProcessResult::Usage(u) => usage = Some(u),
474                            SseProcessResult::Done(sr) => {
475                                for d in build_stream_end_deltas(&tool_calls, usage.take(), sr) { yield Ok(d); }
476                                return;
477                            }
478                            SseProcessResult::Sentinel => {
479                                let sr = if tool_calls.is_empty() { StopReason::EndTurn } else { StopReason::ToolUse };
480                                for d in build_stream_end_deltas(&tool_calls, usage.take(), sr) { yield Ok(d); }
481                                return;
482                            }
483                        }
484                    }
485                }
486            }
487
488            // Stream ended without [DONE] - emit what we have
489            for delta in build_stream_end_deltas(&tool_calls, usage, StopReason::EndTurn) {
490                yield Ok(delta);
491            }
492        })
493    }
494
495    fn model(&self) -> &str {
496        &self.model
497    }
498
499    fn provider(&self) -> &'static str {
500        "openai"
501    }
502
503    fn configured_thinking(&self) -> Option<&ThinkingConfig> {
504        self.thinking.as_ref()
505    }
506}
507
508/// Apply a tool call update to the accumulator.
509fn apply_tool_call_update(
510    tool_calls: &mut std::collections::HashMap<usize, ToolCallAccumulator>,
511    index: usize,
512    id: Option<String>,
513    name: Option<String>,
514    arguments: Option<String>,
515) {
516    let entry = tool_calls
517        .entry(index)
518        .or_insert_with(|| ToolCallAccumulator {
519            id: String::new(),
520            name: String::new(),
521            arguments: String::new(),
522        });
523    if let Some(id) = id {
524        entry.id = id;
525    }
526    if let Some(name) = name {
527        entry.name = name;
528    }
529    if let Some(args) = arguments {
530        entry.arguments.push_str(&args);
531    }
532}
533
534/// Helper to emit tool call deltas and done event.
535fn build_stream_end_deltas(
536    tool_calls: &std::collections::HashMap<usize, ToolCallAccumulator>,
537    usage: Option<Usage>,
538    stop_reason: StopReason,
539) -> Vec<StreamDelta> {
540    let mut deltas = Vec::new();
541
542    // Emit tool calls
543    for (idx, tool) in tool_calls {
544        deltas.push(StreamDelta::ToolUseStart {
545            id: tool.id.clone(),
546            name: tool.name.clone(),
547            block_index: *idx + 1,
548            thought_signature: None,
549        });
550        deltas.push(StreamDelta::ToolInputDelta {
551            id: tool.id.clone(),
552            delta: tool.arguments.clone(),
553            block_index: *idx + 1,
554        });
555    }
556
557    // Emit usage
558    if let Some(u) = usage {
559        deltas.push(StreamDelta::Usage(u));
560    }
561
562    // Emit done
563    deltas.push(StreamDelta::Done {
564        stop_reason: Some(stop_reason),
565    });
566
567    deltas
568}
569
570/// Result of processing an SSE chunk.
571enum SseProcessResult {
572    /// Emit a text delta.
573    TextDelta(String),
574    /// Update tool call accumulator (index, optional id, optional name, optional args).
575    ToolCallUpdate {
576        index: usize,
577        id: Option<String>,
578        name: Option<String>,
579        arguments: Option<String>,
580    },
581    /// Usage information.
582    Usage(Usage),
583    /// Stream is done with a stop reason.
584    Done(StopReason),
585    /// Stream sentinel [DONE] was received.
586    Sentinel,
587}
588
589/// Process an SSE data line and return results to apply.
590fn process_sse_data(data: &str) -> Vec<SseProcessResult> {
591    if data == "[DONE]" {
592        return vec![SseProcessResult::Sentinel];
593    }
594
595    let Ok(chunk) = serde_json::from_str::<SseChunk>(data) else {
596        return vec![];
597    };
598
599    let mut results = Vec::new();
600
601    // Extract usage if present
602    if let Some(u) = chunk.usage {
603        results.push(SseProcessResult::Usage(Usage {
604            input_tokens: u.prompt_tokens,
605            output_tokens: u.completion_tokens,
606        }));
607    }
608
609    // Process choices
610    if let Some(choice) = chunk.choices.into_iter().next() {
611        // Handle text content delta
612        if let Some(content) = choice.delta.content
613            && !content.is_empty()
614        {
615            results.push(SseProcessResult::TextDelta(content));
616        }
617
618        // Handle tool call deltas
619        if let Some(tc_deltas) = choice.delta.tool_calls {
620            for tc in tc_deltas {
621                results.push(SseProcessResult::ToolCallUpdate {
622                    index: tc.index,
623                    id: tc.id,
624                    name: tc.function.as_ref().and_then(|f| f.name.clone()),
625                    arguments: tc.function.as_ref().and_then(|f| f.arguments.clone()),
626                });
627            }
628        }
629
630        // Check for finish reason
631        if let Some(finish_reason) = choice.finish_reason {
632            results.push(SseProcessResult::Done(map_finish_reason(&finish_reason)));
633        }
634    }
635
636    results
637}
638
639fn use_max_tokens_alias(base_url: &str) -> bool {
640    base_url.contains("moonshot.ai")
641        || base_url.contains("api.z.ai")
642        || base_url.contains("minimax.io")
643}
644
645fn map_finish_reason(finish_reason: &str) -> StopReason {
646    match finish_reason {
647        "stop" => StopReason::EndTurn,
648        "tool_calls" => StopReason::ToolUse,
649        "length" => StopReason::MaxTokens,
650        "content_filter" | "network_error" => StopReason::StopSequence,
651        "sensitive" => StopReason::Refusal,
652        unknown => {
653            log::debug!("Unknown finish_reason from OpenAI-compatible API: {unknown}");
654            StopReason::StopSequence
655        }
656    }
657}
658
659fn build_api_chat_request<'a>(
660    model: &'a str,
661    messages: &'a [ApiMessage],
662    max_tokens: u32,
663    tools: Option<&'a [ApiTool]>,
664    reasoning: Option<ApiReasoning>,
665    include_max_tokens_alias: bool,
666) -> ApiChatRequest<'a> {
667    ApiChatRequest {
668        model,
669        messages,
670        max_completion_tokens: Some(max_tokens),
671        max_tokens: include_max_tokens_alias.then_some(max_tokens),
672        tools,
673        reasoning,
674    }
675}
676
677fn build_api_chat_request_streaming<'a>(
678    model: &'a str,
679    messages: &'a [ApiMessage],
680    max_tokens: u32,
681    tools: Option<&'a [ApiTool]>,
682    reasoning: Option<ApiReasoning>,
683    include_max_tokens_alias: bool,
684) -> ApiChatRequestStreaming<'a> {
685    ApiChatRequestStreaming {
686        model,
687        messages,
688        max_completion_tokens: Some(max_tokens),
689        max_tokens: include_max_tokens_alias.then_some(max_tokens),
690        tools,
691        reasoning,
692        stream: true,
693    }
694}
695
696fn build_api_reasoning(thinking: Option<&ThinkingConfig>) -> Option<ApiReasoning> {
697    thinking
698        .and_then(resolve_reasoning_effort)
699        .map(|effort| ApiReasoning { effort })
700}
701
702const fn resolve_reasoning_effort(config: &ThinkingConfig) -> Option<ReasoningEffort> {
703    if let Some(effort) = config.effort {
704        return Some(map_effort(effort));
705    }
706
707    match &config.mode {
708        ThinkingMode::Adaptive => None,
709        ThinkingMode::Enabled { budget_tokens } => Some(map_budget_to_reasoning(*budget_tokens)),
710    }
711}
712
713const fn map_effort(effort: Effort) -> ReasoningEffort {
714    match effort {
715        Effort::Low => ReasoningEffort::Low,
716        Effort::Medium => ReasoningEffort::Medium,
717        Effort::High => ReasoningEffort::High,
718        Effort::Max => ReasoningEffort::XHigh,
719    }
720}
721
722const fn map_budget_to_reasoning(budget_tokens: u32) -> ReasoningEffort {
723    if budget_tokens <= 4_096 {
724        ReasoningEffort::Low
725    } else if budget_tokens <= 16_384 {
726        ReasoningEffort::Medium
727    } else if budget_tokens <= 32_768 {
728        ReasoningEffort::High
729    } else {
730        ReasoningEffort::XHigh
731    }
732}
733
734fn build_api_messages(request: &ChatRequest) -> Vec<ApiMessage> {
735    let mut messages = Vec::new();
736
737    // Add system message first (OpenAI uses a separate message for system prompt)
738    if !request.system.is_empty() {
739        messages.push(ApiMessage {
740            role: ApiRole::System,
741            content: Some(request.system.clone()),
742            tool_calls: None,
743            tool_call_id: None,
744        });
745    }
746
747    // Convert SDK messages to OpenAI format
748    for msg in &request.messages {
749        match &msg.content {
750            Content::Text(text) => {
751                messages.push(ApiMessage {
752                    role: match msg.role {
753                        crate::llm::Role::User => ApiRole::User,
754                        crate::llm::Role::Assistant => ApiRole::Assistant,
755                    },
756                    content: Some(text.clone()),
757                    tool_calls: None,
758                    tool_call_id: None,
759                });
760            }
761            Content::Blocks(blocks) => {
762                // Handle mixed content blocks
763                let mut text_parts = Vec::new();
764                let mut tool_calls = Vec::new();
765
766                for block in blocks {
767                    match block {
768                        ContentBlock::Text { text } => {
769                            text_parts.push(text.clone());
770                        }
771                        ContentBlock::Thinking { .. }
772                        | ContentBlock::RedactedThinking { .. }
773                        | ContentBlock::Image { .. }
774                        | ContentBlock::Document { .. } => {
775                            // These blocks are not sent to the OpenAI API
776                        }
777                        ContentBlock::ToolUse {
778                            id, name, input, ..
779                        } => {
780                            tool_calls.push(ApiToolCall {
781                                id: id.clone(),
782                                r#type: "function".to_owned(),
783                                function: ApiFunctionCall {
784                                    name: name.clone(),
785                                    arguments: serde_json::to_string(input)
786                                        .unwrap_or_else(|_| "{}".to_owned()),
787                                },
788                            });
789                        }
790                        ContentBlock::ToolResult {
791                            tool_use_id,
792                            content,
793                            ..
794                        } => {
795                            // Tool results are separate messages in OpenAI
796                            messages.push(ApiMessage {
797                                role: ApiRole::Tool,
798                                content: Some(content.clone()),
799                                tool_calls: None,
800                                tool_call_id: Some(tool_use_id.clone()),
801                            });
802                        }
803                    }
804                }
805
806                // Add assistant message with text and/or tool calls
807                if !text_parts.is_empty() || !tool_calls.is_empty() {
808                    let role = match msg.role {
809                        crate::llm::Role::User => ApiRole::User,
810                        crate::llm::Role::Assistant => ApiRole::Assistant,
811                    };
812
813                    // Only add if it's an assistant message or has text content
814                    if role == ApiRole::Assistant || !text_parts.is_empty() {
815                        messages.push(ApiMessage {
816                            role,
817                            content: if text_parts.is_empty() {
818                                None
819                            } else {
820                                Some(text_parts.join("\n"))
821                            },
822                            tool_calls: if tool_calls.is_empty() {
823                                None
824                            } else {
825                                Some(tool_calls)
826                            },
827                            tool_call_id: None,
828                        });
829                    }
830                }
831            }
832        }
833    }
834
835    messages
836}
837
838fn convert_tool(t: crate::llm::Tool) -> ApiTool {
839    ApiTool {
840        r#type: "function".to_owned(),
841        function: ApiFunction {
842            name: t.name,
843            description: t.description,
844            parameters: t.input_schema,
845        },
846    }
847}
848
849fn build_content_blocks(message: &ApiResponseMessage) -> Vec<ContentBlock> {
850    let mut blocks = Vec::new();
851
852    // Add text content if present
853    if let Some(content) = &message.content
854        && !content.is_empty()
855    {
856        blocks.push(ContentBlock::Text {
857            text: content.clone(),
858        });
859    }
860
861    // Add tool calls if present
862    if let Some(tool_calls) = &message.tool_calls {
863        for tc in tool_calls {
864            let input: serde_json::Value = serde_json::from_str(&tc.function.arguments)
865                .unwrap_or_else(|_| serde_json::json!({}));
866            blocks.push(ContentBlock::ToolUse {
867                id: tc.id.clone(),
868                name: tc.function.name.clone(),
869                input,
870                thought_signature: None,
871            });
872        }
873    }
874
875    blocks
876}
877
878// ============================================================================
879// API Request Types
880// ============================================================================
881
882#[derive(Serialize)]
883struct ApiChatRequest<'a> {
884    model: &'a str,
885    messages: &'a [ApiMessage],
886    #[serde(skip_serializing_if = "Option::is_none")]
887    max_completion_tokens: Option<u32>,
888    #[serde(skip_serializing_if = "Option::is_none")]
889    max_tokens: Option<u32>,
890    #[serde(skip_serializing_if = "Option::is_none")]
891    tools: Option<&'a [ApiTool]>,
892    #[serde(skip_serializing_if = "Option::is_none")]
893    reasoning: Option<ApiReasoning>,
894}
895
896#[derive(Serialize)]
897struct ApiChatRequestStreaming<'a> {
898    model: &'a str,
899    messages: &'a [ApiMessage],
900    #[serde(skip_serializing_if = "Option::is_none")]
901    max_completion_tokens: Option<u32>,
902    #[serde(skip_serializing_if = "Option::is_none")]
903    max_tokens: Option<u32>,
904    #[serde(skip_serializing_if = "Option::is_none")]
905    tools: Option<&'a [ApiTool]>,
906    #[serde(skip_serializing_if = "Option::is_none")]
907    reasoning: Option<ApiReasoning>,
908    stream: bool,
909}
910
911#[derive(Clone, Copy, Serialize)]
912#[serde(rename_all = "lowercase")]
913enum ReasoningEffort {
914    Low,
915    Medium,
916    High,
917    #[serde(rename = "xhigh")]
918    XHigh,
919}
920
921#[derive(Serialize)]
922struct ApiReasoning {
923    effort: ReasoningEffort,
924}
925
926#[derive(Serialize)]
927struct ApiMessage {
928    role: ApiRole,
929    #[serde(skip_serializing_if = "Option::is_none")]
930    content: Option<String>,
931    #[serde(skip_serializing_if = "Option::is_none")]
932    tool_calls: Option<Vec<ApiToolCall>>,
933    #[serde(skip_serializing_if = "Option::is_none")]
934    tool_call_id: Option<String>,
935}
936
937#[derive(Debug, Serialize, PartialEq, Eq)]
938#[serde(rename_all = "lowercase")]
939enum ApiRole {
940    System,
941    User,
942    Assistant,
943    Tool,
944}
945
946#[derive(Serialize)]
947struct ApiToolCall {
948    id: String,
949    r#type: String,
950    function: ApiFunctionCall,
951}
952
953#[derive(Serialize)]
954struct ApiFunctionCall {
955    name: String,
956    arguments: String,
957}
958
959#[derive(Serialize)]
960struct ApiTool {
961    r#type: String,
962    function: ApiFunction,
963}
964
965#[derive(Serialize)]
966struct ApiFunction {
967    name: String,
968    description: String,
969    parameters: serde_json::Value,
970}
971
972// ============================================================================
973// API Response Types
974// ============================================================================
975
976#[derive(Deserialize)]
977struct ApiChatResponse {
978    id: String,
979    choices: Vec<ApiChoice>,
980    model: String,
981    usage: ApiUsage,
982}
983
984#[derive(Deserialize)]
985struct ApiChoice {
986    message: ApiResponseMessage,
987    finish_reason: Option<String>,
988}
989
990#[derive(Deserialize)]
991struct ApiResponseMessage {
992    content: Option<String>,
993    tool_calls: Option<Vec<ApiResponseToolCall>>,
994}
995
996#[derive(Deserialize)]
997struct ApiResponseToolCall {
998    id: String,
999    function: ApiResponseFunctionCall,
1000}
1001
1002#[derive(Deserialize)]
1003struct ApiResponseFunctionCall {
1004    name: String,
1005    arguments: String,
1006}
1007
1008#[derive(Deserialize)]
1009struct ApiUsage {
1010    #[serde(deserialize_with = "deserialize_u32_from_number")]
1011    prompt_tokens: u32,
1012    #[serde(deserialize_with = "deserialize_u32_from_number")]
1013    completion_tokens: u32,
1014}
1015
1016// ============================================================================
1017// SSE Streaming Types
1018// ============================================================================
1019
1020/// Accumulator for tool call state across stream deltas.
1021struct ToolCallAccumulator {
1022    id: String,
1023    name: String,
1024    arguments: String,
1025}
1026
1027/// A single chunk in `OpenAI`'s SSE stream.
1028#[derive(Deserialize)]
1029struct SseChunk {
1030    choices: Vec<SseChoice>,
1031    #[serde(default)]
1032    usage: Option<SseUsage>,
1033}
1034
1035#[derive(Deserialize)]
1036struct SseChoice {
1037    delta: SseDelta,
1038    finish_reason: Option<String>,
1039}
1040
1041#[derive(Deserialize)]
1042struct SseDelta {
1043    content: Option<String>,
1044    tool_calls: Option<Vec<SseToolCallDelta>>,
1045}
1046
1047#[derive(Deserialize)]
1048struct SseToolCallDelta {
1049    index: usize,
1050    id: Option<String>,
1051    function: Option<SseFunctionDelta>,
1052}
1053
1054#[derive(Deserialize)]
1055struct SseFunctionDelta {
1056    name: Option<String>,
1057    arguments: Option<String>,
1058}
1059
1060#[derive(Deserialize)]
1061struct SseUsage {
1062    #[serde(deserialize_with = "deserialize_u32_from_number")]
1063    prompt_tokens: u32,
1064    #[serde(deserialize_with = "deserialize_u32_from_number")]
1065    completion_tokens: u32,
1066}
1067
1068fn deserialize_u32_from_number<'de, D>(deserializer: D) -> std::result::Result<u32, D::Error>
1069where
1070    D: serde::Deserializer<'de>,
1071{
1072    #[derive(Deserialize)]
1073    #[serde(untagged)]
1074    enum NumberLike {
1075        U64(u64),
1076        F64(f64),
1077    }
1078
1079    match NumberLike::deserialize(deserializer)? {
1080        NumberLike::U64(v) => u32::try_from(v)
1081            .map_err(|_| D::Error::custom(format!("token count out of range for u32: {v}"))),
1082        NumberLike::F64(v) => {
1083            if v.is_finite() && v >= 0.0 && v.fract() == 0.0 && v <= f64::from(u32::MAX) {
1084                v.to_string().parse::<u32>().map_err(|e| {
1085                    D::Error::custom(format!(
1086                        "failed to convert integer-compatible token count {v} to u32: {e}"
1087                    ))
1088                })
1089            } else {
1090                Err(D::Error::custom(format!(
1091                    "token count must be a non-negative integer-compatible number, got {v}"
1092                )))
1093            }
1094        }
1095    }
1096}
1097
1098#[cfg(test)]
1099mod tests {
1100    use super::*;
1101
1102    // ===================
1103    // Constructor Tests
1104    // ===================
1105
1106    #[test]
1107    fn test_new_creates_provider_with_custom_model() {
1108        let provider = OpenAIProvider::new("test-api-key".to_string(), "custom-model".to_string());
1109
1110        assert_eq!(provider.model(), "custom-model");
1111        assert_eq!(provider.provider(), "openai");
1112        assert_eq!(provider.base_url, DEFAULT_BASE_URL);
1113    }
1114
1115    #[test]
1116    fn test_with_base_url_creates_provider_with_custom_url() {
1117        let provider = OpenAIProvider::with_base_url(
1118            "test-api-key".to_string(),
1119            "llama3".to_string(),
1120            "http://localhost:11434/v1".to_string(),
1121        );
1122
1123        assert_eq!(provider.model(), "llama3");
1124        assert_eq!(provider.base_url, "http://localhost:11434/v1");
1125    }
1126
1127    #[test]
1128    fn test_gpt4o_factory_creates_gpt4o_provider() {
1129        let provider = OpenAIProvider::gpt4o("test-api-key".to_string());
1130
1131        assert_eq!(provider.model(), MODEL_GPT4O);
1132        assert_eq!(provider.provider(), "openai");
1133    }
1134
1135    #[test]
1136    fn test_gpt4o_mini_factory_creates_gpt4o_mini_provider() {
1137        let provider = OpenAIProvider::gpt4o_mini("test-api-key".to_string());
1138
1139        assert_eq!(provider.model(), MODEL_GPT4O_MINI);
1140        assert_eq!(provider.provider(), "openai");
1141    }
1142
1143    #[test]
1144    fn test_gpt52_thinking_factory_creates_provider() {
1145        let provider = OpenAIProvider::gpt52_thinking("test-api-key".to_string());
1146
1147        assert_eq!(provider.model(), MODEL_GPT52_THINKING);
1148        assert_eq!(provider.provider(), "openai");
1149    }
1150
1151    #[test]
1152    fn test_gpt54_factory_creates_provider() {
1153        let provider = OpenAIProvider::gpt54("test-api-key".to_string());
1154
1155        assert_eq!(provider.model(), MODEL_GPT54);
1156        assert_eq!(provider.provider(), "openai");
1157    }
1158
1159    #[test]
1160    fn test_gpt53_codex_factory_creates_provider() {
1161        let provider = OpenAIProvider::gpt53_codex("test-api-key".to_string());
1162
1163        assert_eq!(provider.model(), MODEL_GPT53_CODEX);
1164        assert_eq!(provider.provider(), "openai");
1165    }
1166
1167    #[test]
1168    fn test_codex_factory_points_to_latest_codex_model() {
1169        let provider = OpenAIProvider::codex("test-api-key".to_string());
1170
1171        assert_eq!(provider.model(), MODEL_GPT53_CODEX);
1172        assert_eq!(provider.provider(), "openai");
1173    }
1174
1175    #[test]
1176    fn test_gpt5_factory_creates_gpt5_provider() {
1177        let provider = OpenAIProvider::gpt5("test-api-key".to_string());
1178
1179        assert_eq!(provider.model(), MODEL_GPT5);
1180        assert_eq!(provider.provider(), "openai");
1181    }
1182
1183    #[test]
1184    fn test_gpt5_mini_factory_creates_provider() {
1185        let provider = OpenAIProvider::gpt5_mini("test-api-key".to_string());
1186
1187        assert_eq!(provider.model(), MODEL_GPT5_MINI);
1188        assert_eq!(provider.provider(), "openai");
1189    }
1190
1191    #[test]
1192    fn test_o3_factory_creates_o3_provider() {
1193        let provider = OpenAIProvider::o3("test-api-key".to_string());
1194
1195        assert_eq!(provider.model(), MODEL_O3);
1196        assert_eq!(provider.provider(), "openai");
1197    }
1198
1199    #[test]
1200    fn test_o4_mini_factory_creates_o4_mini_provider() {
1201        let provider = OpenAIProvider::o4_mini("test-api-key".to_string());
1202
1203        assert_eq!(provider.model(), MODEL_O4_MINI);
1204        assert_eq!(provider.provider(), "openai");
1205    }
1206
1207    #[test]
1208    fn test_o1_factory_creates_o1_provider() {
1209        let provider = OpenAIProvider::o1("test-api-key".to_string());
1210
1211        assert_eq!(provider.model(), MODEL_O1);
1212        assert_eq!(provider.provider(), "openai");
1213    }
1214
1215    #[test]
1216    fn test_gpt41_factory_creates_gpt41_provider() {
1217        let provider = OpenAIProvider::gpt41("test-api-key".to_string());
1218
1219        assert_eq!(provider.model(), MODEL_GPT41);
1220        assert_eq!(provider.provider(), "openai");
1221    }
1222
1223    #[test]
1224    fn test_kimi_factory_creates_provider_with_kimi_base_url() {
1225        let provider = OpenAIProvider::kimi("test-api-key".to_string(), "kimi-custom".to_string());
1226
1227        assert_eq!(provider.model(), "kimi-custom");
1228        assert_eq!(provider.base_url, BASE_URL_KIMI);
1229        assert_eq!(provider.provider(), "openai");
1230    }
1231
1232    #[test]
1233    fn test_kimi_k2_5_factory_creates_provider() {
1234        let provider = OpenAIProvider::kimi_k2_5("test-api-key".to_string());
1235
1236        assert_eq!(provider.model(), MODEL_KIMI_K2_5);
1237        assert_eq!(provider.base_url, BASE_URL_KIMI);
1238        assert_eq!(provider.provider(), "openai");
1239    }
1240
1241    #[test]
1242    fn test_kimi_k2_thinking_factory_creates_provider() {
1243        let provider = OpenAIProvider::kimi_k2_thinking("test-api-key".to_string());
1244
1245        assert_eq!(provider.model(), MODEL_KIMI_K2_THINKING);
1246        assert_eq!(provider.base_url, BASE_URL_KIMI);
1247        assert_eq!(provider.provider(), "openai");
1248    }
1249
1250    #[test]
1251    fn test_zai_factory_creates_provider_with_zai_base_url() {
1252        let provider = OpenAIProvider::zai("test-api-key".to_string(), "glm-custom".to_string());
1253
1254        assert_eq!(provider.model(), "glm-custom");
1255        assert_eq!(provider.base_url, BASE_URL_ZAI);
1256        assert_eq!(provider.provider(), "openai");
1257    }
1258
1259    #[test]
1260    fn test_zai_glm5_factory_creates_provider() {
1261        let provider = OpenAIProvider::zai_glm5("test-api-key".to_string());
1262
1263        assert_eq!(provider.model(), MODEL_ZAI_GLM5);
1264        assert_eq!(provider.base_url, BASE_URL_ZAI);
1265        assert_eq!(provider.provider(), "openai");
1266    }
1267
1268    #[test]
1269    fn test_minimax_factory_creates_provider_with_minimax_base_url() {
1270        let provider =
1271            OpenAIProvider::minimax("test-api-key".to_string(), "minimax-custom".to_string());
1272
1273        assert_eq!(provider.model(), "minimax-custom");
1274        assert_eq!(provider.base_url, BASE_URL_MINIMAX);
1275        assert_eq!(provider.provider(), "openai");
1276    }
1277
1278    #[test]
1279    fn test_minimax_m2_5_factory_creates_provider() {
1280        let provider = OpenAIProvider::minimax_m2_5("test-api-key".to_string());
1281
1282        assert_eq!(provider.model(), MODEL_MINIMAX_M2_5);
1283        assert_eq!(provider.base_url, BASE_URL_MINIMAX);
1284        assert_eq!(provider.provider(), "openai");
1285    }
1286
1287    // ===================
1288    // Model Constants Tests
1289    // ===================
1290
1291    #[test]
1292    fn test_model_constants_have_expected_values() {
1293        // GPT-5.4 / GPT-5.3 Codex
1294        assert_eq!(MODEL_GPT54, "gpt-5.4");
1295        assert_eq!(MODEL_GPT53_CODEX, "gpt-5.3-codex");
1296        // GPT-5.2 series
1297        assert_eq!(MODEL_GPT52_INSTANT, "gpt-5.2-instant");
1298        assert_eq!(MODEL_GPT52_THINKING, "gpt-5.2-thinking");
1299        assert_eq!(MODEL_GPT52_PRO, "gpt-5.2-pro");
1300        assert_eq!(MODEL_GPT52_CODEX, "gpt-5.2-codex");
1301        // GPT-5 series
1302        assert_eq!(MODEL_GPT5, "gpt-5");
1303        assert_eq!(MODEL_GPT5_MINI, "gpt-5-mini");
1304        assert_eq!(MODEL_GPT5_NANO, "gpt-5-nano");
1305        // o-series
1306        assert_eq!(MODEL_O3, "o3");
1307        assert_eq!(MODEL_O3_MINI, "o3-mini");
1308        assert_eq!(MODEL_O4_MINI, "o4-mini");
1309        assert_eq!(MODEL_O1, "o1");
1310        assert_eq!(MODEL_O1_MINI, "o1-mini");
1311        // GPT-4.1 series
1312        assert_eq!(MODEL_GPT41, "gpt-4.1");
1313        assert_eq!(MODEL_GPT41_MINI, "gpt-4.1-mini");
1314        assert_eq!(MODEL_GPT41_NANO, "gpt-4.1-nano");
1315        // GPT-4o series
1316        assert_eq!(MODEL_GPT4O, "gpt-4o");
1317        assert_eq!(MODEL_GPT4O_MINI, "gpt-4o-mini");
1318        // OpenAI-compatible vendor defaults
1319        assert_eq!(MODEL_KIMI_K2_5, "kimi-k2.5");
1320        assert_eq!(MODEL_KIMI_K2_THINKING, "kimi-k2-thinking");
1321        assert_eq!(MODEL_ZAI_GLM5, "glm-5");
1322        assert_eq!(MODEL_MINIMAX_M2_5, "MiniMax-M2.5");
1323        assert_eq!(BASE_URL_KIMI, "https://api.moonshot.ai/v1");
1324        assert_eq!(BASE_URL_ZAI, "https://api.z.ai/api/paas/v4");
1325        assert_eq!(BASE_URL_MINIMAX, "https://api.minimax.io/v1");
1326    }
1327
1328    // ===================
1329    // Clone Tests
1330    // ===================
1331
1332    #[test]
1333    fn test_provider_is_cloneable() {
1334        let provider = OpenAIProvider::new("test-api-key".to_string(), "test-model".to_string());
1335        let cloned = provider.clone();
1336
1337        assert_eq!(provider.model(), cloned.model());
1338        assert_eq!(provider.provider(), cloned.provider());
1339        assert_eq!(provider.base_url, cloned.base_url);
1340    }
1341
1342    // ===================
1343    // API Type Serialization Tests
1344    // ===================
1345
1346    #[test]
1347    fn test_api_role_serialization() {
1348        let system_role = ApiRole::System;
1349        let user_role = ApiRole::User;
1350        let assistant_role = ApiRole::Assistant;
1351        let tool_role = ApiRole::Tool;
1352
1353        assert_eq!(serde_json::to_string(&system_role).unwrap(), "\"system\"");
1354        assert_eq!(serde_json::to_string(&user_role).unwrap(), "\"user\"");
1355        assert_eq!(
1356            serde_json::to_string(&assistant_role).unwrap(),
1357            "\"assistant\""
1358        );
1359        assert_eq!(serde_json::to_string(&tool_role).unwrap(), "\"tool\"");
1360    }
1361
1362    #[test]
1363    fn test_api_message_serialization_simple() {
1364        let message = ApiMessage {
1365            role: ApiRole::User,
1366            content: Some("Hello, world!".to_string()),
1367            tool_calls: None,
1368            tool_call_id: None,
1369        };
1370
1371        let json = serde_json::to_string(&message).unwrap();
1372        assert!(json.contains("\"role\":\"user\""));
1373        assert!(json.contains("\"content\":\"Hello, world!\""));
1374        // Optional fields should be omitted
1375        assert!(!json.contains("tool_calls"));
1376        assert!(!json.contains("tool_call_id"));
1377    }
1378
1379    #[test]
1380    fn test_api_message_serialization_with_tool_calls() {
1381        let message = ApiMessage {
1382            role: ApiRole::Assistant,
1383            content: Some("Let me help.".to_string()),
1384            tool_calls: Some(vec![ApiToolCall {
1385                id: "call_123".to_string(),
1386                r#type: "function".to_string(),
1387                function: ApiFunctionCall {
1388                    name: "read_file".to_string(),
1389                    arguments: "{\"path\": \"/test.txt\"}".to_string(),
1390                },
1391            }]),
1392            tool_call_id: None,
1393        };
1394
1395        let json = serde_json::to_string(&message).unwrap();
1396        assert!(json.contains("\"role\":\"assistant\""));
1397        assert!(json.contains("\"tool_calls\""));
1398        assert!(json.contains("\"id\":\"call_123\""));
1399        assert!(json.contains("\"type\":\"function\""));
1400        assert!(json.contains("\"name\":\"read_file\""));
1401    }
1402
1403    #[test]
1404    fn test_api_tool_message_serialization() {
1405        let message = ApiMessage {
1406            role: ApiRole::Tool,
1407            content: Some("File contents here".to_string()),
1408            tool_calls: None,
1409            tool_call_id: Some("call_123".to_string()),
1410        };
1411
1412        let json = serde_json::to_string(&message).unwrap();
1413        assert!(json.contains("\"role\":\"tool\""));
1414        assert!(json.contains("\"tool_call_id\":\"call_123\""));
1415        assert!(json.contains("\"content\":\"File contents here\""));
1416    }
1417
1418    #[test]
1419    fn test_api_tool_serialization() {
1420        let tool = ApiTool {
1421            r#type: "function".to_string(),
1422            function: ApiFunction {
1423                name: "test_tool".to_string(),
1424                description: "A test tool".to_string(),
1425                parameters: serde_json::json!({
1426                    "type": "object",
1427                    "properties": {
1428                        "arg": {"type": "string"}
1429                    }
1430                }),
1431            },
1432        };
1433
1434        let json = serde_json::to_string(&tool).unwrap();
1435        assert!(json.contains("\"type\":\"function\""));
1436        assert!(json.contains("\"name\":\"test_tool\""));
1437        assert!(json.contains("\"description\":\"A test tool\""));
1438        assert!(json.contains("\"parameters\""));
1439    }
1440
1441    // ===================
1442    // API Type Deserialization Tests
1443    // ===================
1444
1445    #[test]
1446    fn test_api_response_deserialization() {
1447        let json = r#"{
1448            "id": "chatcmpl-123",
1449            "choices": [
1450                {
1451                    "message": {
1452                        "content": "Hello!"
1453                    },
1454                    "finish_reason": "stop"
1455                }
1456            ],
1457            "model": "gpt-4o",
1458            "usage": {
1459                "prompt_tokens": 100,
1460                "completion_tokens": 50
1461            }
1462        }"#;
1463
1464        let response: ApiChatResponse = serde_json::from_str(json).unwrap();
1465        assert_eq!(response.id, "chatcmpl-123");
1466        assert_eq!(response.model, "gpt-4o");
1467        assert_eq!(response.usage.prompt_tokens, 100);
1468        assert_eq!(response.usage.completion_tokens, 50);
1469        assert_eq!(response.choices.len(), 1);
1470        assert_eq!(
1471            response.choices[0].message.content,
1472            Some("Hello!".to_string())
1473        );
1474    }
1475
1476    #[test]
1477    fn test_api_response_with_tool_calls_deserialization() {
1478        let json = r#"{
1479            "id": "chatcmpl-456",
1480            "choices": [
1481                {
1482                    "message": {
1483                        "content": null,
1484                        "tool_calls": [
1485                            {
1486                                "id": "call_abc",
1487                                "type": "function",
1488                                "function": {
1489                                    "name": "read_file",
1490                                    "arguments": "{\"path\": \"test.txt\"}"
1491                                }
1492                            }
1493                        ]
1494                    },
1495                    "finish_reason": "tool_calls"
1496                }
1497            ],
1498            "model": "gpt-4o",
1499            "usage": {
1500                "prompt_tokens": 150,
1501                "completion_tokens": 30
1502            }
1503        }"#;
1504
1505        let response: ApiChatResponse = serde_json::from_str(json).unwrap();
1506        let tool_calls = response.choices[0].message.tool_calls.as_ref().unwrap();
1507        assert_eq!(tool_calls.len(), 1);
1508        assert_eq!(tool_calls[0].id, "call_abc");
1509        assert_eq!(tool_calls[0].function.name, "read_file");
1510    }
1511
1512    #[test]
1513    fn test_api_response_with_unknown_finish_reason_deserialization() {
1514        let json = r#"{
1515            "id": "chatcmpl-789",
1516            "choices": [
1517                {
1518                    "message": {
1519                        "content": "ok"
1520                    },
1521                    "finish_reason": "vendor_custom_reason"
1522                }
1523            ],
1524            "model": "glm-5",
1525            "usage": {
1526                "prompt_tokens": 10,
1527                "completion_tokens": 5
1528            }
1529        }"#;
1530
1531        let response: ApiChatResponse = serde_json::from_str(json).unwrap();
1532        assert_eq!(
1533            response.choices[0].finish_reason.as_deref(),
1534            Some("vendor_custom_reason")
1535        );
1536        assert_eq!(
1537            map_finish_reason(response.choices[0].finish_reason.as_deref().unwrap()),
1538            StopReason::StopSequence
1539        );
1540    }
1541
1542    #[test]
1543    fn test_map_finish_reason_covers_vendor_specific_values() {
1544        assert_eq!(map_finish_reason("stop"), StopReason::EndTurn);
1545        assert_eq!(map_finish_reason("tool_calls"), StopReason::ToolUse);
1546        assert_eq!(map_finish_reason("length"), StopReason::MaxTokens);
1547        assert_eq!(
1548            map_finish_reason("content_filter"),
1549            StopReason::StopSequence
1550        );
1551        assert_eq!(map_finish_reason("sensitive"), StopReason::Refusal);
1552        assert_eq!(map_finish_reason("network_error"), StopReason::StopSequence);
1553        assert_eq!(
1554            map_finish_reason("some_new_reason"),
1555            StopReason::StopSequence
1556        );
1557    }
1558
1559    // ===================
1560    // Message Conversion Tests
1561    // ===================
1562
1563    #[test]
1564    fn test_build_api_messages_with_system() {
1565        let request = ChatRequest {
1566            system: "You are helpful.".to_string(),
1567            messages: vec![crate::llm::Message::user("Hello")],
1568            tools: None,
1569            max_tokens: 1024,
1570            thinking: None,
1571        };
1572
1573        let api_messages = build_api_messages(&request);
1574        assert_eq!(api_messages.len(), 2);
1575        assert_eq!(api_messages[0].role, ApiRole::System);
1576        assert_eq!(
1577            api_messages[0].content,
1578            Some("You are helpful.".to_string())
1579        );
1580        assert_eq!(api_messages[1].role, ApiRole::User);
1581        assert_eq!(api_messages[1].content, Some("Hello".to_string()));
1582    }
1583
1584    #[test]
1585    fn test_build_api_messages_empty_system() {
1586        let request = ChatRequest {
1587            system: String::new(),
1588            messages: vec![crate::llm::Message::user("Hello")],
1589            tools: None,
1590            max_tokens: 1024,
1591            thinking: None,
1592        };
1593
1594        let api_messages = build_api_messages(&request);
1595        assert_eq!(api_messages.len(), 1);
1596        assert_eq!(api_messages[0].role, ApiRole::User);
1597    }
1598
1599    #[test]
1600    fn test_convert_tool() {
1601        let tool = crate::llm::Tool {
1602            name: "test_tool".to_string(),
1603            description: "A test tool".to_string(),
1604            input_schema: serde_json::json!({"type": "object"}),
1605        };
1606
1607        let api_tool = convert_tool(tool);
1608        assert_eq!(api_tool.r#type, "function");
1609        assert_eq!(api_tool.function.name, "test_tool");
1610        assert_eq!(api_tool.function.description, "A test tool");
1611    }
1612
1613    #[test]
1614    fn test_build_content_blocks_text_only() {
1615        let message = ApiResponseMessage {
1616            content: Some("Hello!".to_string()),
1617            tool_calls: None,
1618        };
1619
1620        let blocks = build_content_blocks(&message);
1621        assert_eq!(blocks.len(), 1);
1622        assert!(matches!(&blocks[0], ContentBlock::Text { text } if text == "Hello!"));
1623    }
1624
1625    #[test]
1626    fn test_build_content_blocks_with_tool_calls() {
1627        let message = ApiResponseMessage {
1628            content: Some("Let me help.".to_string()),
1629            tool_calls: Some(vec![ApiResponseToolCall {
1630                id: "call_123".to_string(),
1631                function: ApiResponseFunctionCall {
1632                    name: "read_file".to_string(),
1633                    arguments: "{\"path\": \"test.txt\"}".to_string(),
1634                },
1635            }]),
1636        };
1637
1638        let blocks = build_content_blocks(&message);
1639        assert_eq!(blocks.len(), 2);
1640        assert!(matches!(&blocks[0], ContentBlock::Text { text } if text == "Let me help."));
1641        assert!(
1642            matches!(&blocks[1], ContentBlock::ToolUse { id, name, .. } if id == "call_123" && name == "read_file")
1643        );
1644    }
1645
1646    // ===================
1647    // SSE Streaming Type Tests
1648    // ===================
1649
1650    #[test]
1651    fn test_sse_chunk_text_delta_deserialization() {
1652        let json = r#"{
1653            "choices": [{
1654                "delta": {
1655                    "content": "Hello"
1656                },
1657                "finish_reason": null
1658            }]
1659        }"#;
1660
1661        let chunk: SseChunk = serde_json::from_str(json).unwrap();
1662        assert_eq!(chunk.choices.len(), 1);
1663        assert_eq!(chunk.choices[0].delta.content, Some("Hello".to_string()));
1664        assert!(chunk.choices[0].finish_reason.is_none());
1665    }
1666
1667    #[test]
1668    fn test_sse_chunk_tool_call_delta_deserialization() {
1669        let json = r#"{
1670            "choices": [{
1671                "delta": {
1672                    "tool_calls": [{
1673                        "index": 0,
1674                        "id": "call_abc",
1675                        "function": {
1676                            "name": "read_file",
1677                            "arguments": ""
1678                        }
1679                    }]
1680                },
1681                "finish_reason": null
1682            }]
1683        }"#;
1684
1685        let chunk: SseChunk = serde_json::from_str(json).unwrap();
1686        let tool_calls = chunk.choices[0].delta.tool_calls.as_ref().unwrap();
1687        assert_eq!(tool_calls.len(), 1);
1688        assert_eq!(tool_calls[0].index, 0);
1689        assert_eq!(tool_calls[0].id, Some("call_abc".to_string()));
1690        assert_eq!(
1691            tool_calls[0].function.as_ref().unwrap().name,
1692            Some("read_file".to_string())
1693        );
1694    }
1695
1696    #[test]
1697    fn test_sse_chunk_tool_call_arguments_delta_deserialization() {
1698        let json = r#"{
1699            "choices": [{
1700                "delta": {
1701                    "tool_calls": [{
1702                        "index": 0,
1703                        "function": {
1704                            "arguments": "{\"path\":"
1705                        }
1706                    }]
1707                },
1708                "finish_reason": null
1709            }]
1710        }"#;
1711
1712        let chunk: SseChunk = serde_json::from_str(json).unwrap();
1713        let tool_calls = chunk.choices[0].delta.tool_calls.as_ref().unwrap();
1714        assert_eq!(tool_calls[0].id, None);
1715        assert_eq!(
1716            tool_calls[0].function.as_ref().unwrap().arguments,
1717            Some("{\"path\":".to_string())
1718        );
1719    }
1720
1721    #[test]
1722    fn test_sse_chunk_with_finish_reason_deserialization() {
1723        let json = r#"{
1724            "choices": [{
1725                "delta": {},
1726                "finish_reason": "stop"
1727            }]
1728        }"#;
1729
1730        let chunk: SseChunk = serde_json::from_str(json).unwrap();
1731        assert_eq!(chunk.choices[0].finish_reason.as_deref(), Some("stop"));
1732    }
1733
1734    #[test]
1735    fn test_sse_chunk_with_usage_deserialization() {
1736        let json = r#"{
1737            "choices": [{
1738                "delta": {},
1739                "finish_reason": "stop"
1740            }],
1741            "usage": {
1742                "prompt_tokens": 100,
1743                "completion_tokens": 50
1744            }
1745        }"#;
1746
1747        let chunk: SseChunk = serde_json::from_str(json).unwrap();
1748        let usage = chunk.usage.unwrap();
1749        assert_eq!(usage.prompt_tokens, 100);
1750        assert_eq!(usage.completion_tokens, 50);
1751    }
1752
1753    #[test]
1754    fn test_sse_chunk_with_float_usage_deserialization() {
1755        let json = r#"{
1756            "choices": [{
1757                "delta": {},
1758                "finish_reason": "stop"
1759            }],
1760            "usage": {
1761                "prompt_tokens": 100.0,
1762                "completion_tokens": 50.0
1763            }
1764        }"#;
1765
1766        let chunk: SseChunk = serde_json::from_str(json).unwrap();
1767        let usage = chunk.usage.unwrap();
1768        assert_eq!(usage.prompt_tokens, 100);
1769        assert_eq!(usage.completion_tokens, 50);
1770    }
1771
1772    #[test]
1773    fn test_api_usage_deserializes_integer_compatible_numbers() {
1774        let json = r#"{
1775            "prompt_tokens": 42.0,
1776            "completion_tokens": 7
1777        }"#;
1778
1779        let usage: ApiUsage = serde_json::from_str(json).unwrap();
1780        assert_eq!(usage.prompt_tokens, 42);
1781        assert_eq!(usage.completion_tokens, 7);
1782    }
1783
1784    #[test]
1785    fn test_api_usage_rejects_fractional_numbers() {
1786        let json = r#"{
1787            "prompt_tokens": 42.5,
1788            "completion_tokens": 7
1789        }"#;
1790
1791        let usage: std::result::Result<ApiUsage, _> = serde_json::from_str(json);
1792        assert!(usage.is_err());
1793    }
1794
1795    #[test]
1796    fn test_use_max_tokens_alias_for_vendor_urls() {
1797        assert!(!use_max_tokens_alias(DEFAULT_BASE_URL));
1798        assert!(use_max_tokens_alias(BASE_URL_KIMI));
1799        assert!(use_max_tokens_alias(BASE_URL_ZAI));
1800        assert!(use_max_tokens_alias(BASE_URL_MINIMAX));
1801    }
1802
1803    #[test]
1804    fn test_requires_responses_api_only_for_legacy_codex_model() {
1805        assert!(requires_responses_api(MODEL_GPT52_CODEX));
1806        assert!(!requires_responses_api(MODEL_GPT53_CODEX));
1807        assert!(!requires_responses_api(MODEL_GPT54));
1808    }
1809
1810    #[test]
1811    fn test_build_api_reasoning_maps_enabled_budget_to_effort() {
1812        let reasoning = build_api_reasoning(Some(&ThinkingConfig::new(40_000))).unwrap();
1813        assert!(matches!(reasoning.effort, ReasoningEffort::XHigh));
1814    }
1815
1816    #[test]
1817    fn test_build_api_reasoning_uses_explicit_effort() {
1818        let reasoning =
1819            build_api_reasoning(Some(&ThinkingConfig::adaptive_with_effort(Effort::High))).unwrap();
1820        assert!(matches!(reasoning.effort, ReasoningEffort::High));
1821    }
1822
1823    #[test]
1824    fn test_build_api_reasoning_omits_adaptive_without_effort() {
1825        assert!(build_api_reasoning(Some(&ThinkingConfig::adaptive())).is_none());
1826    }
1827
1828    #[test]
1829    fn test_openai_rejects_adaptive_thinking() {
1830        let provider = OpenAIProvider::gpt54("test-key".to_string());
1831        let error = provider
1832            .validate_thinking_config(Some(&ThinkingConfig::adaptive()))
1833            .unwrap_err();
1834        assert!(
1835            error
1836                .to_string()
1837                .contains("adaptive thinking is not supported")
1838        );
1839    }
1840
1841    #[test]
1842    fn test_request_serialization_openai_uses_max_completion_tokens_only() {
1843        let messages = vec![ApiMessage {
1844            role: ApiRole::User,
1845            content: Some("Hello".to_string()),
1846            tool_calls: None,
1847            tool_call_id: None,
1848        }];
1849
1850        let request = ApiChatRequest {
1851            model: "gpt-4o",
1852            messages: &messages,
1853            max_completion_tokens: Some(1024),
1854            max_tokens: None,
1855            tools: None,
1856            reasoning: None,
1857        };
1858
1859        let json = serde_json::to_string(&request).unwrap();
1860        assert!(json.contains("\"max_completion_tokens\":1024"));
1861        assert!(!json.contains("\"max_tokens\""));
1862    }
1863
1864    #[test]
1865    fn test_request_serialization_with_max_tokens_alias() {
1866        let messages = vec![ApiMessage {
1867            role: ApiRole::User,
1868            content: Some("Hello".to_string()),
1869            tool_calls: None,
1870            tool_call_id: None,
1871        }];
1872
1873        let request = ApiChatRequest {
1874            model: "glm-5",
1875            messages: &messages,
1876            max_completion_tokens: Some(1024),
1877            max_tokens: Some(1024),
1878            tools: None,
1879            reasoning: None,
1880        };
1881
1882        let json = serde_json::to_string(&request).unwrap();
1883        assert!(json.contains("\"max_completion_tokens\":1024"));
1884        assert!(json.contains("\"max_tokens\":1024"));
1885    }
1886
1887    #[test]
1888    fn test_streaming_request_serialization_openai_default() {
1889        let messages = vec![ApiMessage {
1890            role: ApiRole::User,
1891            content: Some("Hello".to_string()),
1892            tool_calls: None,
1893            tool_call_id: None,
1894        }];
1895
1896        let request = ApiChatRequestStreaming {
1897            model: "gpt-4o",
1898            messages: &messages,
1899            max_completion_tokens: Some(1024),
1900            max_tokens: None,
1901            tools: None,
1902            reasoning: None,
1903            stream: true,
1904        };
1905
1906        let json = serde_json::to_string(&request).unwrap();
1907        assert!(json.contains("\"stream\":true"));
1908        assert!(json.contains("\"model\":\"gpt-4o\""));
1909        assert!(json.contains("\"max_completion_tokens\":1024"));
1910        assert!(!json.contains("\"max_tokens\""));
1911    }
1912
1913    #[test]
1914    fn test_streaming_request_serialization_with_max_tokens_alias() {
1915        let messages = vec![ApiMessage {
1916            role: ApiRole::User,
1917            content: Some("Hello".to_string()),
1918            tool_calls: None,
1919            tool_call_id: None,
1920        }];
1921
1922        let request = ApiChatRequestStreaming {
1923            model: "kimi-k2-thinking",
1924            messages: &messages,
1925            max_completion_tokens: Some(1024),
1926            max_tokens: Some(1024),
1927            tools: None,
1928            reasoning: None,
1929            stream: true,
1930        };
1931
1932        let json = serde_json::to_string(&request).unwrap();
1933        assert!(json.contains("\"max_completion_tokens\":1024"));
1934        assert!(json.contains("\"max_tokens\":1024"));
1935    }
1936
1937    #[test]
1938    fn test_request_serialization_includes_reasoning_when_present() {
1939        let messages = vec![ApiMessage {
1940            role: ApiRole::User,
1941            content: Some("Hello".to_string()),
1942            tool_calls: None,
1943            tool_call_id: None,
1944        }];
1945
1946        let request = ApiChatRequest {
1947            model: MODEL_GPT54,
1948            messages: &messages,
1949            max_completion_tokens: Some(1024),
1950            max_tokens: None,
1951            tools: None,
1952            reasoning: Some(ApiReasoning {
1953                effort: ReasoningEffort::High,
1954            }),
1955        };
1956
1957        let json = serde_json::to_string(&request).unwrap();
1958        assert!(json.contains("\"reasoning\":{\"effort\":\"high\"}"));
1959    }
1960}