Skip to main content

agent_sdk/providers/
openai.rs

1//! `OpenAI` API provider implementation.
2//!
3//! This module provides an implementation of `LlmProvider` for the `OpenAI`
4//! Chat Completions API. It also supports `OpenAI`-compatible APIs (Ollama, vLLM, etc.)
5//! via the `with_base_url` constructor.
6//!
7//! Legacy models that require the Responses API (like `gpt-5.2-codex`) are automatically
8//! routed to the correct endpoint.
9
10use crate::llm::attachments::{request_has_attachments, validate_request_attachments};
11use crate::llm::{
12    ChatOutcome, ChatRequest, ChatResponse, Content, ContentBlock, Effort, LlmProvider, StopReason,
13    StreamBox, StreamDelta, ThinkingConfig, ThinkingMode, Usage,
14};
15use anyhow::Result;
16use async_trait::async_trait;
17use futures::StreamExt;
18use reqwest::StatusCode;
19use serde::de::Error as _;
20use serde::{Deserialize, Serialize};
21
22use super::openai_responses::OpenAIResponsesProvider;
23
24const DEFAULT_BASE_URL: &str = "https://api.openai.com/v1";
25
26/// Check if a model requires the Responses API instead of Chat Completions.
27fn requires_responses_api(model: &str) -> bool {
28    model == MODEL_GPT52_CODEX
29}
30
31fn is_official_openai_base_url(base_url: &str) -> bool {
32    base_url == DEFAULT_BASE_URL || base_url.contains("api.openai.com")
33}
34
35fn request_is_agentic(request: &ChatRequest) -> bool {
36    request
37        .tools
38        .as_ref()
39        .is_some_and(|tools| !tools.is_empty()) || request.messages.iter().any(|message| {
40        matches!(
41            &message.content,
42            Content::Blocks(blocks)
43                if blocks.iter().any(|block| {
44                    matches!(block, ContentBlock::ToolUse { .. } | ContentBlock::ToolResult { .. })
45                })
46        )
47    })
48}
49
50fn should_use_responses_api(base_url: &str, model: &str, request: &ChatRequest) -> bool {
51    requires_responses_api(model)
52        || request_has_attachments(request)
53        || (is_official_openai_base_url(base_url) && request_is_agentic(request))
54}
55
56// GPT-5.4 series
57pub const MODEL_GPT54: &str = "gpt-5.4";
58
59// GPT-5.3 Codex series
60pub const MODEL_GPT53_CODEX: &str = "gpt-5.3-codex";
61
62// GPT-5.2 series
63pub const MODEL_GPT52_INSTANT: &str = "gpt-5.2-instant";
64pub const MODEL_GPT52_THINKING: &str = "gpt-5.2-thinking";
65pub const MODEL_GPT52_PRO: &str = "gpt-5.2-pro";
66pub const MODEL_GPT52_CODEX: &str = "gpt-5.2-codex";
67
68// GPT-5 series (400k context)
69pub const MODEL_GPT5: &str = "gpt-5";
70pub const MODEL_GPT5_MINI: &str = "gpt-5-mini";
71pub const MODEL_GPT5_NANO: &str = "gpt-5-nano";
72
73// o-series reasoning models
74pub const MODEL_O3: &str = "o3";
75pub const MODEL_O3_MINI: &str = "o3-mini";
76pub const MODEL_O4_MINI: &str = "o4-mini";
77pub const MODEL_O1: &str = "o1";
78pub const MODEL_O1_MINI: &str = "o1-mini";
79
80// GPT-4.1 series (improved instruction following, 1M context)
81pub const MODEL_GPT41: &str = "gpt-4.1";
82pub const MODEL_GPT41_MINI: &str = "gpt-4.1-mini";
83pub const MODEL_GPT41_NANO: &str = "gpt-4.1-nano";
84
85// GPT-4o series
86pub const MODEL_GPT4O: &str = "gpt-4o";
87pub const MODEL_GPT4O_MINI: &str = "gpt-4o-mini";
88
89// OpenAI-compatible vendor defaults
90pub const BASE_URL_KIMI: &str = "https://api.moonshot.ai/v1";
91pub const BASE_URL_ZAI: &str = "https://api.z.ai/api/paas/v4";
92pub const BASE_URL_MINIMAX: &str = "https://api.minimax.io/v1";
93pub const MODEL_KIMI_K2_5: &str = "kimi-k2.5";
94pub const MODEL_KIMI_K2_THINKING: &str = "kimi-k2-thinking";
95pub const MODEL_ZAI_GLM5: &str = "glm-5";
96pub const MODEL_MINIMAX_M2_5: &str = "MiniMax-M2.5";
97
98/// `OpenAI` LLM provider using the Chat Completions API.
99///
100/// Also supports `OpenAI`-compatible APIs (Ollama, vLLM, Azure `OpenAI`, etc.)
101/// via the `with_base_url` constructor.
102#[derive(Clone)]
103pub struct OpenAIProvider {
104    client: reqwest::Client,
105    api_key: String,
106    model: String,
107    base_url: String,
108    thinking: Option<ThinkingConfig>,
109    /// Extra headers applied to every request (e.g. for gateway authentication).
110    extra_headers: Vec<(String, String)>,
111}
112
113impl OpenAIProvider {
114    /// Create a new `OpenAI` provider with the specified API key and model.
115    #[must_use]
116    pub fn new(api_key: String, model: String) -> Self {
117        Self {
118            client: reqwest::Client::new(),
119            api_key,
120            model,
121            base_url: DEFAULT_BASE_URL.to_owned(),
122            thinking: None,
123            extra_headers: Vec::new(),
124        }
125    }
126
127    /// Create a new provider with a custom base URL for OpenAI-compatible APIs.
128    #[must_use]
129    pub fn with_base_url(api_key: String, model: String, base_url: String) -> Self {
130        Self {
131            client: reqwest::Client::new(),
132            api_key,
133            model,
134            base_url,
135            thinking: None,
136            extra_headers: Vec::new(),
137        }
138    }
139
140    /// Create a provider using Moonshot KIMI via OpenAI-compatible Chat Completions.
141    #[must_use]
142    pub fn kimi(api_key: String, model: String) -> Self {
143        Self::with_base_url(api_key, model, BASE_URL_KIMI.to_owned())
144    }
145
146    /// Create a provider using KIMI K2.5 (default KIMI model).
147    #[must_use]
148    pub fn kimi_k2_5(api_key: String) -> Self {
149        Self::kimi(api_key, MODEL_KIMI_K2_5.to_owned())
150    }
151
152    /// Create a provider using KIMI K2 Thinking.
153    #[must_use]
154    pub fn kimi_k2_thinking(api_key: String) -> Self {
155        Self::kimi(api_key, MODEL_KIMI_K2_THINKING.to_owned())
156    }
157
158    /// Create a provider using z.ai via OpenAI-compatible Chat Completions.
159    #[must_use]
160    pub fn zai(api_key: String, model: String) -> Self {
161        Self::with_base_url(api_key, model, BASE_URL_ZAI.to_owned())
162    }
163
164    /// Create a provider using z.ai GLM-5 (default z.ai agentic reasoning model).
165    #[must_use]
166    pub fn zai_glm5(api_key: String) -> Self {
167        Self::zai(api_key, MODEL_ZAI_GLM5.to_owned())
168    }
169
170    /// Create a provider using `MiniMax` via OpenAI-compatible Chat Completions.
171    #[must_use]
172    pub fn minimax(api_key: String, model: String) -> Self {
173        Self::with_base_url(api_key, model, BASE_URL_MINIMAX.to_owned())
174    }
175
176    /// Create a provider using `MiniMax` M2.5 (default `MiniMax` model).
177    #[must_use]
178    pub fn minimax_m2_5(api_key: String) -> Self {
179        Self::minimax(api_key, MODEL_MINIMAX_M2_5.to_owned())
180    }
181
182    /// Create a provider using GPT-5.2 Instant (speed-optimized for routine queries).
183    #[must_use]
184    pub fn gpt52_instant(api_key: String) -> Self {
185        Self::new(api_key, MODEL_GPT52_INSTANT.to_owned())
186    }
187
188    /// Create a provider using GPT-5.4 (frontier reasoning with 1.05M context).
189    #[must_use]
190    pub fn gpt54(api_key: String) -> Self {
191        Self::new(api_key, MODEL_GPT54.to_owned())
192    }
193
194    /// Create a provider using GPT-5.3 Codex (latest codex model).
195    #[must_use]
196    pub fn gpt53_codex(api_key: String) -> Self {
197        Self::new(api_key, MODEL_GPT53_CODEX.to_owned())
198    }
199
200    /// Create a provider using GPT-5.2 Thinking (complex reasoning, coding, analysis).
201    #[must_use]
202    pub fn gpt52_thinking(api_key: String) -> Self {
203        Self::new(api_key, MODEL_GPT52_THINKING.to_owned())
204    }
205
206    /// Create a provider using GPT-5.2 Pro (maximum accuracy for difficult problems).
207    #[must_use]
208    pub fn gpt52_pro(api_key: String) -> Self {
209        Self::new(api_key, MODEL_GPT52_PRO.to_owned())
210    }
211
212    /// Create a provider using the latest Codex model.
213    #[must_use]
214    pub fn codex(api_key: String) -> Self {
215        Self::gpt53_codex(api_key)
216    }
217
218    /// Create a provider using GPT-5 (400k context, coding and reasoning).
219    #[must_use]
220    pub fn gpt5(api_key: String) -> Self {
221        Self::new(api_key, MODEL_GPT5.to_owned())
222    }
223
224    /// Create a provider using GPT-5-mini (faster, cost-efficient GPT-5).
225    #[must_use]
226    pub fn gpt5_mini(api_key: String) -> Self {
227        Self::new(api_key, MODEL_GPT5_MINI.to_owned())
228    }
229
230    /// Create a provider using GPT-5-nano (fastest, cheapest GPT-5 variant).
231    #[must_use]
232    pub fn gpt5_nano(api_key: String) -> Self {
233        Self::new(api_key, MODEL_GPT5_NANO.to_owned())
234    }
235
236    /// Create a provider using o3 (most intelligent reasoning model).
237    #[must_use]
238    pub fn o3(api_key: String) -> Self {
239        Self::new(api_key, MODEL_O3.to_owned())
240    }
241
242    /// Create a provider using o3-mini (smaller o3 variant).
243    #[must_use]
244    pub fn o3_mini(api_key: String) -> Self {
245        Self::new(api_key, MODEL_O3_MINI.to_owned())
246    }
247
248    /// Create a provider using o4-mini (fast, cost-efficient reasoning).
249    #[must_use]
250    pub fn o4_mini(api_key: String) -> Self {
251        Self::new(api_key, MODEL_O4_MINI.to_owned())
252    }
253
254    /// Create a provider using o1 (reasoning model).
255    #[must_use]
256    pub fn o1(api_key: String) -> Self {
257        Self::new(api_key, MODEL_O1.to_owned())
258    }
259
260    /// Create a provider using o1-mini (fast reasoning model).
261    #[must_use]
262    pub fn o1_mini(api_key: String) -> Self {
263        Self::new(api_key, MODEL_O1_MINI.to_owned())
264    }
265
266    /// Create a provider using GPT-4.1 (improved instruction following, 1M context).
267    #[must_use]
268    pub fn gpt41(api_key: String) -> Self {
269        Self::new(api_key, MODEL_GPT41.to_owned())
270    }
271
272    /// Create a provider using GPT-4.1-mini (smaller, faster GPT-4.1).
273    #[must_use]
274    pub fn gpt41_mini(api_key: String) -> Self {
275        Self::new(api_key, MODEL_GPT41_MINI.to_owned())
276    }
277
278    /// Create a provider using GPT-4o.
279    #[must_use]
280    pub fn gpt4o(api_key: String) -> Self {
281        Self::new(api_key, MODEL_GPT4O.to_owned())
282    }
283
284    /// Create a provider using GPT-4o-mini (fast and cost-effective).
285    #[must_use]
286    pub fn gpt4o_mini(api_key: String) -> Self {
287        Self::new(api_key, MODEL_GPT4O_MINI.to_owned())
288    }
289
290    /// Set the provider-owned thinking configuration for this model.
291    #[must_use]
292    pub const fn with_thinking(mut self, thinking: ThinkingConfig) -> Self {
293        self.thinking = Some(thinking);
294        self
295    }
296
297    /// Add extra HTTP headers applied to every request.
298    #[must_use]
299    pub fn with_extra_headers(mut self, headers: Vec<(String, String)>) -> Self {
300        self.extra_headers = headers;
301        self
302    }
303
304    /// Apply auth + extra headers. Skips `Authorization` when `api_key` is
305    /// empty (BYOK gateway mode — auth handled via `extra_headers`).
306    fn apply_headers(&self, builder: reqwest::RequestBuilder) -> reqwest::RequestBuilder {
307        let builder = if self.api_key.is_empty() {
308            builder
309        } else {
310            builder.header("Authorization", format!("Bearer {}", self.api_key))
311        };
312        self.extra_headers
313            .iter()
314            .fold(builder, |b, (k, v)| b.header(k.as_str(), v.as_str()))
315    }
316}
317
318#[async_trait]
319impl LlmProvider for OpenAIProvider {
320    async fn chat(&self, request: ChatRequest) -> Result<ChatOutcome> {
321        // Route official OpenAI agentic flows to the Responses API.
322        if should_use_responses_api(&self.base_url, &self.model, &request) {
323            let mut responses_provider = OpenAIResponsesProvider::with_base_url(
324                self.api_key.clone(),
325                self.model.clone(),
326                self.base_url.clone(),
327            );
328            if let Some(thinking) = self.thinking.clone() {
329                responses_provider = responses_provider.with_thinking(thinking);
330            }
331            return responses_provider.chat(request).await;
332        }
333
334        let thinking_config = match self.resolve_thinking_config(request.thinking.as_ref()) {
335            Ok(thinking) => thinking,
336            Err(error) => return Ok(ChatOutcome::InvalidRequest(error.to_string())),
337        };
338        if let Err(error) = validate_request_attachments(self.provider(), self.model(), &request) {
339            return Ok(ChatOutcome::InvalidRequest(error.to_string()));
340        }
341        let reasoning = build_api_reasoning(thinking_config.as_ref());
342        let messages = build_api_messages(&request);
343        let tools: Option<Vec<ApiTool>> = request
344            .tools
345            .map(|ts| ts.into_iter().map(convert_tool).collect());
346
347        let api_request = build_api_chat_request(
348            &self.model,
349            &messages,
350            request.max_tokens,
351            tools.as_deref(),
352            reasoning,
353            use_max_tokens_alias(&self.base_url),
354        );
355
356        log::debug!(
357            "OpenAI LLM request model={} max_tokens={}",
358            self.model,
359            request.max_tokens
360        );
361
362        let builder = self
363            .client
364            .post(format!("{}/chat/completions", self.base_url))
365            .header("Content-Type", "application/json");
366        let response = self
367            .apply_headers(builder)
368            .json(&api_request)
369            .send()
370            .await
371            .map_err(|e| anyhow::anyhow!("request failed: {e}"))?;
372
373        let status = response.status();
374        let bytes = response
375            .bytes()
376            .await
377            .map_err(|e| anyhow::anyhow!("failed to read response body: {e}"))?;
378
379        log::debug!(
380            "OpenAI LLM response status={} body_len={}",
381            status,
382            bytes.len()
383        );
384
385        if status == StatusCode::TOO_MANY_REQUESTS {
386            return Ok(ChatOutcome::RateLimited);
387        }
388
389        if status.is_server_error() {
390            let body = String::from_utf8_lossy(&bytes);
391            log::error!("OpenAI server error status={status} body={body}");
392            return Ok(ChatOutcome::ServerError(body.into_owned()));
393        }
394
395        if status.is_client_error() {
396            let body = String::from_utf8_lossy(&bytes);
397            log::warn!("OpenAI client error status={status} body={body}");
398            return Ok(ChatOutcome::InvalidRequest(body.into_owned()));
399        }
400
401        let api_response: ApiChatResponse = serde_json::from_slice(&bytes)
402            .map_err(|e| anyhow::anyhow!("failed to parse response: {e}"))?;
403
404        let choice = api_response
405            .choices
406            .into_iter()
407            .next()
408            .ok_or_else(|| anyhow::anyhow!("no choices in response"))?;
409
410        let content = build_content_blocks(&choice.message);
411
412        let stop_reason = choice.finish_reason.as_deref().map(map_finish_reason);
413
414        Ok(ChatOutcome::Success(ChatResponse {
415            id: api_response.id,
416            content,
417            model: api_response.model,
418            stop_reason,
419            usage: Usage {
420                input_tokens: api_response.usage.prompt_tokens,
421                output_tokens: api_response.usage.completion_tokens,
422                cached_input_tokens: api_response
423                    .usage
424                    .prompt_tokens_details
425                    .as_ref()
426                    .map_or(0, |details| details.cached_tokens),
427            },
428        }))
429    }
430
431    #[allow(clippy::too_many_lines)]
432    fn chat_stream(&self, request: ChatRequest) -> StreamBox<'_> {
433        // Route official OpenAI agentic flows to the Responses API.
434        if should_use_responses_api(&self.base_url, &self.model, &request) {
435            let api_key = self.api_key.clone();
436            let model = self.model.clone();
437            let base_url = self.base_url.clone();
438            let thinking = self.thinking.clone();
439            return Box::pin(async_stream::stream! {
440                let mut responses_provider =
441                    OpenAIResponsesProvider::with_base_url(api_key, model, base_url);
442                if let Some(thinking) = thinking {
443                    responses_provider = responses_provider.with_thinking(thinking);
444                }
445                let mut stream = std::pin::pin!(responses_provider.chat_stream(request));
446                while let Some(item) = futures::StreamExt::next(&mut stream).await {
447                    yield item;
448                }
449            });
450        }
451
452        Box::pin(async_stream::stream! {
453            let thinking_config = match self.resolve_thinking_config(request.thinking.as_ref()) {
454                Ok(thinking) => thinking,
455                Err(error) => {
456                    yield Ok(StreamDelta::Error {
457                        message: error.to_string(),
458                        recoverable: false,
459                    });
460                    return;
461                }
462            };
463            if let Err(error) = validate_request_attachments(self.provider(), self.model(), &request) {
464                yield Ok(StreamDelta::Error {
465                    message: error.to_string(),
466                    recoverable: false,
467                });
468                return;
469            }
470            let reasoning = build_api_reasoning(thinking_config.as_ref());
471            let messages = build_api_messages(&request);
472            let tools: Option<Vec<ApiTool>> = request
473                .tools
474                .map(|ts| ts.into_iter().map(convert_tool).collect());
475
476            let api_request = build_api_chat_request_streaming(
477                &self.model,
478                &messages,
479                request.max_tokens,
480                tools.as_deref(),
481                reasoning,
482                use_max_tokens_alias(&self.base_url),
483                use_stream_usage_options(&self.base_url),
484            );
485
486            log::debug!("OpenAI streaming LLM request model={} max_tokens={}", self.model, request.max_tokens);
487
488            let stream_builder = self.client
489                .post(format!("{}/chat/completions", self.base_url))
490                .header("Content-Type", "application/json");
491            let Ok(response) = self
492                .apply_headers(stream_builder)
493                .json(&api_request)
494                .send()
495                .await
496            else {
497                yield Err(anyhow::anyhow!("request failed"));
498                return;
499            };
500
501            let status = response.status();
502
503            if !status.is_success() {
504                let body = response.text().await.unwrap_or_default();
505                let (recoverable, level) = if status == StatusCode::TOO_MANY_REQUESTS {
506                    (true, "rate_limit")
507                } else if status.is_server_error() {
508                    (true, "server_error")
509                } else {
510                    (false, "client_error")
511                };
512                log::warn!("OpenAI error status={status} body={body} kind={level}");
513                yield Ok(StreamDelta::Error { message: body, recoverable });
514                return;
515            }
516
517            // Track tool call state across deltas
518            let mut tool_calls: std::collections::HashMap<usize, ToolCallAccumulator> =
519                std::collections::HashMap::new();
520            let mut usage: Option<Usage> = None;
521            let mut buffer = String::new();
522            let mut stream = response.bytes_stream();
523
524            while let Some(chunk_result) = stream.next().await {
525                let Ok(chunk) = chunk_result else {
526                    yield Err(anyhow::anyhow!("stream error: {}", chunk_result.unwrap_err()));
527                    return;
528                };
529                buffer.push_str(&String::from_utf8_lossy(&chunk));
530
531                while let Some(pos) = buffer.find('\n') {
532                    let line = buffer[..pos].trim().to_string();
533                    buffer = buffer[pos + 1..].to_string();
534                    if line.is_empty() { continue; }
535                    let Some(data) = line.strip_prefix("data: ") else { continue; };
536
537                    for result in process_sse_data(data) {
538                        match result {
539                            SseProcessResult::TextDelta(c) => yield Ok(StreamDelta::TextDelta { delta: c, block_index: 0 }),
540                            SseProcessResult::ToolCallUpdate { index, id, name, arguments } => apply_tool_call_update(&mut tool_calls, index, id, name, arguments),
541                            SseProcessResult::Usage(u) => usage = Some(u),
542                            SseProcessResult::Done(sr) => {
543                                for d in build_stream_end_deltas(&tool_calls, usage.take(), sr) { yield Ok(d); }
544                                return;
545                            }
546                            SseProcessResult::Sentinel => {
547                                let sr = if tool_calls.is_empty() { StopReason::EndTurn } else { StopReason::ToolUse };
548                                for d in build_stream_end_deltas(&tool_calls, usage.take(), sr) { yield Ok(d); }
549                                return;
550                            }
551                        }
552                    }
553                }
554            }
555
556            // Stream ended without [DONE] - emit what we have
557            for delta in build_stream_end_deltas(&tool_calls, usage, StopReason::EndTurn) {
558                yield Ok(delta);
559            }
560        })
561    }
562
563    fn model(&self) -> &str {
564        &self.model
565    }
566
567    fn provider(&self) -> &'static str {
568        "openai"
569    }
570
571    fn configured_thinking(&self) -> Option<&ThinkingConfig> {
572        self.thinking.as_ref()
573    }
574}
575
576/// Apply a tool call update to the accumulator.
577fn apply_tool_call_update(
578    tool_calls: &mut std::collections::HashMap<usize, ToolCallAccumulator>,
579    index: usize,
580    id: Option<String>,
581    name: Option<String>,
582    arguments: Option<String>,
583) {
584    let entry = tool_calls
585        .entry(index)
586        .or_insert_with(|| ToolCallAccumulator {
587            id: String::new(),
588            name: String::new(),
589            arguments: String::new(),
590        });
591    if let Some(id) = id {
592        entry.id = id;
593    }
594    if let Some(name) = name {
595        entry.name = name;
596    }
597    if let Some(args) = arguments {
598        entry.arguments.push_str(&args);
599    }
600}
601
602/// Helper to emit tool call deltas and done event.
603fn build_stream_end_deltas(
604    tool_calls: &std::collections::HashMap<usize, ToolCallAccumulator>,
605    usage: Option<Usage>,
606    stop_reason: StopReason,
607) -> Vec<StreamDelta> {
608    let mut deltas = Vec::new();
609
610    // Emit tool calls
611    for (idx, tool) in tool_calls {
612        deltas.push(StreamDelta::ToolUseStart {
613            id: tool.id.clone(),
614            name: tool.name.clone(),
615            block_index: *idx + 1,
616            thought_signature: None,
617        });
618        deltas.push(StreamDelta::ToolInputDelta {
619            id: tool.id.clone(),
620            delta: tool.arguments.clone(),
621            block_index: *idx + 1,
622        });
623    }
624
625    // Emit usage
626    if let Some(u) = usage {
627        deltas.push(StreamDelta::Usage(u));
628    }
629
630    // Emit done
631    deltas.push(StreamDelta::Done {
632        stop_reason: Some(stop_reason),
633    });
634
635    deltas
636}
637
638/// Result of processing an SSE chunk.
639enum SseProcessResult {
640    /// Emit a text delta.
641    TextDelta(String),
642    /// Update tool call accumulator (index, optional id, optional name, optional args).
643    ToolCallUpdate {
644        index: usize,
645        id: Option<String>,
646        name: Option<String>,
647        arguments: Option<String>,
648    },
649    /// Usage information.
650    Usage(Usage),
651    /// Stream is done with a stop reason.
652    Done(StopReason),
653    /// Stream sentinel [DONE] was received.
654    Sentinel,
655}
656
657/// Process an SSE data line and return results to apply.
658fn process_sse_data(data: &str) -> Vec<SseProcessResult> {
659    if data == "[DONE]" {
660        return vec![SseProcessResult::Sentinel];
661    }
662
663    let Ok(chunk) = serde_json::from_str::<SseChunk>(data) else {
664        return vec![];
665    };
666
667    let mut results = Vec::new();
668
669    // Extract usage if present
670    if let Some(u) = chunk.usage {
671        results.push(SseProcessResult::Usage(Usage {
672            input_tokens: u.prompt_tokens,
673            output_tokens: u.completion_tokens,
674            cached_input_tokens: u
675                .prompt_tokens_details
676                .as_ref()
677                .map_or(0, |details| details.cached_tokens),
678        }));
679    }
680
681    // Process choices
682    if let Some(choice) = chunk.choices.into_iter().next() {
683        // Handle text content delta
684        if let Some(content) = choice.delta.content
685            && !content.is_empty()
686        {
687            results.push(SseProcessResult::TextDelta(content));
688        }
689
690        // Handle tool call deltas
691        if let Some(tc_deltas) = choice.delta.tool_calls {
692            for tc in tc_deltas {
693                results.push(SseProcessResult::ToolCallUpdate {
694                    index: tc.index,
695                    id: tc.id,
696                    name: tc.function.as_ref().and_then(|f| f.name.clone()),
697                    arguments: tc.function.as_ref().and_then(|f| f.arguments.clone()),
698                });
699            }
700        }
701
702        // Check for finish reason
703        if let Some(finish_reason) = choice.finish_reason {
704            results.push(SseProcessResult::Done(map_finish_reason(&finish_reason)));
705        }
706    }
707
708    results
709}
710
711fn use_max_tokens_alias(base_url: &str) -> bool {
712    base_url.contains("moonshot.ai")
713        || base_url.contains("api.z.ai")
714        || base_url.contains("minimax.io")
715}
716
717fn use_stream_usage_options(base_url: &str) -> bool {
718    base_url == DEFAULT_BASE_URL || base_url.contains("api.openai.com")
719}
720
721fn map_finish_reason(finish_reason: &str) -> StopReason {
722    match finish_reason {
723        "stop" => StopReason::EndTurn,
724        "tool_calls" => StopReason::ToolUse,
725        "length" => StopReason::MaxTokens,
726        "content_filter" | "network_error" => StopReason::StopSequence,
727        "sensitive" => StopReason::Refusal,
728        unknown => {
729            log::debug!("Unknown finish_reason from OpenAI-compatible API: {unknown}");
730            StopReason::StopSequence
731        }
732    }
733}
734
735fn build_api_chat_request<'a>(
736    model: &'a str,
737    messages: &'a [ApiMessage],
738    max_tokens: u32,
739    tools: Option<&'a [ApiTool]>,
740    reasoning: Option<ApiReasoning>,
741    include_max_tokens_alias: bool,
742) -> ApiChatRequest<'a> {
743    ApiChatRequest {
744        model,
745        messages,
746        max_completion_tokens: Some(max_tokens),
747        max_tokens: include_max_tokens_alias.then_some(max_tokens),
748        tools,
749        reasoning,
750    }
751}
752
753fn build_api_chat_request_streaming<'a>(
754    model: &'a str,
755    messages: &'a [ApiMessage],
756    max_tokens: u32,
757    tools: Option<&'a [ApiTool]>,
758    reasoning: Option<ApiReasoning>,
759    include_max_tokens_alias: bool,
760    include_stream_usage: bool,
761) -> ApiChatRequestStreaming<'a> {
762    ApiChatRequestStreaming {
763        model,
764        messages,
765        max_completion_tokens: Some(max_tokens),
766        max_tokens: include_max_tokens_alias.then_some(max_tokens),
767        tools,
768        reasoning,
769        stream_options: include_stream_usage.then_some(ApiStreamOptions {
770            include_usage: true,
771        }),
772        stream: true,
773    }
774}
775
776fn build_api_reasoning(thinking: Option<&ThinkingConfig>) -> Option<ApiReasoning> {
777    thinking
778        .and_then(resolve_reasoning_effort)
779        .map(|effort| ApiReasoning { effort })
780}
781
782const fn resolve_reasoning_effort(config: &ThinkingConfig) -> Option<ReasoningEffort> {
783    if let Some(effort) = config.effort {
784        return Some(map_effort(effort));
785    }
786
787    match &config.mode {
788        ThinkingMode::Adaptive => None,
789        ThinkingMode::Enabled { budget_tokens } => Some(map_budget_to_reasoning(*budget_tokens)),
790    }
791}
792
793const fn map_effort(effort: Effort) -> ReasoningEffort {
794    match effort {
795        Effort::Low => ReasoningEffort::Low,
796        Effort::Medium => ReasoningEffort::Medium,
797        Effort::High => ReasoningEffort::High,
798        Effort::Max => ReasoningEffort::XHigh,
799    }
800}
801
802const fn map_budget_to_reasoning(budget_tokens: u32) -> ReasoningEffort {
803    if budget_tokens <= 4_096 {
804        ReasoningEffort::Low
805    } else if budget_tokens <= 16_384 {
806        ReasoningEffort::Medium
807    } else if budget_tokens <= 32_768 {
808        ReasoningEffort::High
809    } else {
810        ReasoningEffort::XHigh
811    }
812}
813
814fn build_api_messages(request: &ChatRequest) -> Vec<ApiMessage> {
815    let mut messages = Vec::new();
816
817    // Add system message first (OpenAI uses a separate message for system prompt)
818    if !request.system.is_empty() {
819        messages.push(ApiMessage {
820            role: ApiRole::System,
821            content: Some(request.system.clone()),
822            tool_calls: None,
823            tool_call_id: None,
824        });
825    }
826
827    // Convert SDK messages to OpenAI format
828    for msg in &request.messages {
829        match &msg.content {
830            Content::Text(text) => {
831                messages.push(ApiMessage {
832                    role: match msg.role {
833                        crate::llm::Role::User => ApiRole::User,
834                        crate::llm::Role::Assistant => ApiRole::Assistant,
835                    },
836                    content: Some(text.clone()),
837                    tool_calls: None,
838                    tool_call_id: None,
839                });
840            }
841            Content::Blocks(blocks) => {
842                // Handle mixed content blocks
843                let mut text_parts = Vec::new();
844                let mut tool_calls = Vec::new();
845
846                for block in blocks {
847                    match block {
848                        ContentBlock::Text { text } => {
849                            text_parts.push(text.clone());
850                        }
851                        ContentBlock::Thinking { .. }
852                        | ContentBlock::RedactedThinking { .. }
853                        | ContentBlock::Image { .. }
854                        | ContentBlock::Document { .. } => {
855                            // These blocks are not sent to the OpenAI API
856                        }
857                        ContentBlock::ToolUse {
858                            id, name, input, ..
859                        } => {
860                            tool_calls.push(ApiToolCall {
861                                id: id.clone(),
862                                r#type: "function".to_owned(),
863                                function: ApiFunctionCall {
864                                    name: name.clone(),
865                                    arguments: serde_json::to_string(input)
866                                        .unwrap_or_else(|_| "{}".to_owned()),
867                                },
868                            });
869                        }
870                        ContentBlock::ToolResult {
871                            tool_use_id,
872                            content,
873                            ..
874                        } => {
875                            // Tool results are separate messages in OpenAI
876                            messages.push(ApiMessage {
877                                role: ApiRole::Tool,
878                                content: Some(content.clone()),
879                                tool_calls: None,
880                                tool_call_id: Some(tool_use_id.clone()),
881                            });
882                        }
883                    }
884                }
885
886                // Add assistant message with text and/or tool calls
887                if !text_parts.is_empty() || !tool_calls.is_empty() {
888                    let role = match msg.role {
889                        crate::llm::Role::User => ApiRole::User,
890                        crate::llm::Role::Assistant => ApiRole::Assistant,
891                    };
892
893                    // Only add if it's an assistant message or has text content
894                    if role == ApiRole::Assistant || !text_parts.is_empty() {
895                        messages.push(ApiMessage {
896                            role,
897                            content: if text_parts.is_empty() {
898                                None
899                            } else {
900                                Some(text_parts.join("\n"))
901                            },
902                            tool_calls: if tool_calls.is_empty() {
903                                None
904                            } else {
905                                Some(tool_calls)
906                            },
907                            tool_call_id: None,
908                        });
909                    }
910                }
911            }
912        }
913    }
914
915    messages
916}
917
918fn convert_tool(t: crate::llm::Tool) -> ApiTool {
919    ApiTool {
920        r#type: "function".to_owned(),
921        function: ApiFunction {
922            name: t.name,
923            description: t.description,
924            parameters: t.input_schema,
925        },
926    }
927}
928
929fn build_content_blocks(message: &ApiResponseMessage) -> Vec<ContentBlock> {
930    let mut blocks = Vec::new();
931
932    // Add text content if present
933    if let Some(content) = &message.content
934        && !content.is_empty()
935    {
936        blocks.push(ContentBlock::Text {
937            text: content.clone(),
938        });
939    }
940
941    // Add tool calls if present
942    if let Some(tool_calls) = &message.tool_calls {
943        for tc in tool_calls {
944            let input: serde_json::Value = serde_json::from_str(&tc.function.arguments)
945                .unwrap_or_else(|_| serde_json::json!({}));
946            blocks.push(ContentBlock::ToolUse {
947                id: tc.id.clone(),
948                name: tc.function.name.clone(),
949                input,
950                thought_signature: None,
951            });
952        }
953    }
954
955    blocks
956}
957
958// ============================================================================
959// API Request Types
960// ============================================================================
961
962#[derive(Serialize)]
963struct ApiChatRequest<'a> {
964    model: &'a str,
965    messages: &'a [ApiMessage],
966    #[serde(skip_serializing_if = "Option::is_none")]
967    max_completion_tokens: Option<u32>,
968    #[serde(skip_serializing_if = "Option::is_none")]
969    max_tokens: Option<u32>,
970    #[serde(skip_serializing_if = "Option::is_none")]
971    tools: Option<&'a [ApiTool]>,
972    #[serde(skip_serializing_if = "Option::is_none")]
973    reasoning: Option<ApiReasoning>,
974}
975
976#[derive(Serialize)]
977struct ApiChatRequestStreaming<'a> {
978    model: &'a str,
979    messages: &'a [ApiMessage],
980    #[serde(skip_serializing_if = "Option::is_none")]
981    max_completion_tokens: Option<u32>,
982    #[serde(skip_serializing_if = "Option::is_none")]
983    max_tokens: Option<u32>,
984    #[serde(skip_serializing_if = "Option::is_none")]
985    tools: Option<&'a [ApiTool]>,
986    #[serde(skip_serializing_if = "Option::is_none")]
987    reasoning: Option<ApiReasoning>,
988    #[serde(skip_serializing_if = "Option::is_none")]
989    stream_options: Option<ApiStreamOptions>,
990    stream: bool,
991}
992
993#[derive(Clone, Copy, Serialize)]
994struct ApiStreamOptions {
995    include_usage: bool,
996}
997
998#[derive(Clone, Copy, Serialize)]
999#[serde(rename_all = "lowercase")]
1000enum ReasoningEffort {
1001    Low,
1002    Medium,
1003    High,
1004    #[serde(rename = "xhigh")]
1005    XHigh,
1006}
1007
1008#[derive(Serialize)]
1009struct ApiReasoning {
1010    effort: ReasoningEffort,
1011}
1012
1013#[derive(Serialize)]
1014struct ApiMessage {
1015    role: ApiRole,
1016    #[serde(skip_serializing_if = "Option::is_none")]
1017    content: Option<String>,
1018    #[serde(skip_serializing_if = "Option::is_none")]
1019    tool_calls: Option<Vec<ApiToolCall>>,
1020    #[serde(skip_serializing_if = "Option::is_none")]
1021    tool_call_id: Option<String>,
1022}
1023
1024#[derive(Debug, Serialize, PartialEq, Eq)]
1025#[serde(rename_all = "lowercase")]
1026enum ApiRole {
1027    System,
1028    User,
1029    Assistant,
1030    Tool,
1031}
1032
1033#[derive(Serialize)]
1034struct ApiToolCall {
1035    id: String,
1036    r#type: String,
1037    function: ApiFunctionCall,
1038}
1039
1040#[derive(Serialize)]
1041struct ApiFunctionCall {
1042    name: String,
1043    arguments: String,
1044}
1045
1046#[derive(Serialize)]
1047struct ApiTool {
1048    r#type: String,
1049    function: ApiFunction,
1050}
1051
1052#[derive(Serialize)]
1053struct ApiFunction {
1054    name: String,
1055    description: String,
1056    parameters: serde_json::Value,
1057}
1058
1059// ============================================================================
1060// API Response Types
1061// ============================================================================
1062
1063#[derive(Deserialize)]
1064struct ApiChatResponse {
1065    id: String,
1066    choices: Vec<ApiChoice>,
1067    model: String,
1068    usage: ApiUsage,
1069}
1070
1071#[derive(Deserialize)]
1072struct ApiChoice {
1073    message: ApiResponseMessage,
1074    finish_reason: Option<String>,
1075}
1076
1077#[derive(Deserialize)]
1078struct ApiResponseMessage {
1079    content: Option<String>,
1080    tool_calls: Option<Vec<ApiResponseToolCall>>,
1081}
1082
1083#[derive(Deserialize)]
1084struct ApiResponseToolCall {
1085    id: String,
1086    function: ApiResponseFunctionCall,
1087}
1088
1089#[derive(Deserialize)]
1090struct ApiResponseFunctionCall {
1091    name: String,
1092    arguments: String,
1093}
1094
1095#[derive(Deserialize)]
1096struct ApiUsage {
1097    #[serde(deserialize_with = "deserialize_u32_from_number")]
1098    prompt_tokens: u32,
1099    #[serde(deserialize_with = "deserialize_u32_from_number")]
1100    completion_tokens: u32,
1101    #[serde(default)]
1102    prompt_tokens_details: Option<ApiPromptTokensDetails>,
1103}
1104
1105#[derive(Deserialize)]
1106struct ApiPromptTokensDetails {
1107    #[serde(default, deserialize_with = "deserialize_u32_from_number")]
1108    cached_tokens: u32,
1109}
1110
1111// ============================================================================
1112// SSE Streaming Types
1113// ============================================================================
1114
1115/// Accumulator for tool call state across stream deltas.
1116struct ToolCallAccumulator {
1117    id: String,
1118    name: String,
1119    arguments: String,
1120}
1121
1122/// A single chunk in `OpenAI`'s SSE stream.
1123#[derive(Deserialize)]
1124struct SseChunk {
1125    choices: Vec<SseChoice>,
1126    #[serde(default)]
1127    usage: Option<SseUsage>,
1128}
1129
1130#[derive(Deserialize)]
1131struct SseChoice {
1132    delta: SseDelta,
1133    finish_reason: Option<String>,
1134}
1135
1136#[derive(Deserialize)]
1137struct SseDelta {
1138    content: Option<String>,
1139    tool_calls: Option<Vec<SseToolCallDelta>>,
1140}
1141
1142#[derive(Deserialize)]
1143struct SseToolCallDelta {
1144    index: usize,
1145    id: Option<String>,
1146    function: Option<SseFunctionDelta>,
1147}
1148
1149#[derive(Deserialize)]
1150struct SseFunctionDelta {
1151    name: Option<String>,
1152    arguments: Option<String>,
1153}
1154
1155#[derive(Deserialize)]
1156struct SseUsage {
1157    #[serde(deserialize_with = "deserialize_u32_from_number")]
1158    prompt_tokens: u32,
1159    #[serde(deserialize_with = "deserialize_u32_from_number")]
1160    completion_tokens: u32,
1161    #[serde(default)]
1162    prompt_tokens_details: Option<ApiPromptTokensDetails>,
1163}
1164
1165fn deserialize_u32_from_number<'de, D>(deserializer: D) -> std::result::Result<u32, D::Error>
1166where
1167    D: serde::Deserializer<'de>,
1168{
1169    #[derive(Deserialize)]
1170    #[serde(untagged)]
1171    enum NumberLike {
1172        U64(u64),
1173        F64(f64),
1174    }
1175
1176    match NumberLike::deserialize(deserializer)? {
1177        NumberLike::U64(v) => u32::try_from(v)
1178            .map_err(|_| D::Error::custom(format!("token count out of range for u32: {v}"))),
1179        NumberLike::F64(v) => {
1180            if v.is_finite() && v >= 0.0 && v.fract() == 0.0 && v <= f64::from(u32::MAX) {
1181                v.to_string().parse::<u32>().map_err(|e| {
1182                    D::Error::custom(format!(
1183                        "failed to convert integer-compatible token count {v} to u32: {e}"
1184                    ))
1185                })
1186            } else {
1187                Err(D::Error::custom(format!(
1188                    "token count must be a non-negative integer-compatible number, got {v}"
1189                )))
1190            }
1191        }
1192    }
1193}
1194
1195#[cfg(test)]
1196mod tests {
1197    use super::*;
1198
1199    // ===================
1200    // Constructor Tests
1201    // ===================
1202
1203    #[test]
1204    fn test_new_creates_provider_with_custom_model() {
1205        let provider = OpenAIProvider::new("test-api-key".to_string(), "custom-model".to_string());
1206
1207        assert_eq!(provider.model(), "custom-model");
1208        assert_eq!(provider.provider(), "openai");
1209        assert_eq!(provider.base_url, DEFAULT_BASE_URL);
1210    }
1211
1212    #[test]
1213    fn test_with_base_url_creates_provider_with_custom_url() {
1214        let provider = OpenAIProvider::with_base_url(
1215            "test-api-key".to_string(),
1216            "llama3".to_string(),
1217            "http://localhost:11434/v1".to_string(),
1218        );
1219
1220        assert_eq!(provider.model(), "llama3");
1221        assert_eq!(provider.base_url, "http://localhost:11434/v1");
1222    }
1223
1224    #[test]
1225    fn test_gpt4o_factory_creates_gpt4o_provider() {
1226        let provider = OpenAIProvider::gpt4o("test-api-key".to_string());
1227
1228        assert_eq!(provider.model(), MODEL_GPT4O);
1229        assert_eq!(provider.provider(), "openai");
1230    }
1231
1232    #[test]
1233    fn test_gpt4o_mini_factory_creates_gpt4o_mini_provider() {
1234        let provider = OpenAIProvider::gpt4o_mini("test-api-key".to_string());
1235
1236        assert_eq!(provider.model(), MODEL_GPT4O_MINI);
1237        assert_eq!(provider.provider(), "openai");
1238    }
1239
1240    #[test]
1241    fn test_gpt52_thinking_factory_creates_provider() {
1242        let provider = OpenAIProvider::gpt52_thinking("test-api-key".to_string());
1243
1244        assert_eq!(provider.model(), MODEL_GPT52_THINKING);
1245        assert_eq!(provider.provider(), "openai");
1246    }
1247
1248    #[test]
1249    fn test_gpt54_factory_creates_provider() {
1250        let provider = OpenAIProvider::gpt54("test-api-key".to_string());
1251
1252        assert_eq!(provider.model(), MODEL_GPT54);
1253        assert_eq!(provider.provider(), "openai");
1254    }
1255
1256    #[test]
1257    fn test_gpt53_codex_factory_creates_provider() {
1258        let provider = OpenAIProvider::gpt53_codex("test-api-key".to_string());
1259
1260        assert_eq!(provider.model(), MODEL_GPT53_CODEX);
1261        assert_eq!(provider.provider(), "openai");
1262    }
1263
1264    #[test]
1265    fn test_codex_factory_points_to_latest_codex_model() {
1266        let provider = OpenAIProvider::codex("test-api-key".to_string());
1267
1268        assert_eq!(provider.model(), MODEL_GPT53_CODEX);
1269        assert_eq!(provider.provider(), "openai");
1270    }
1271
1272    #[test]
1273    fn test_gpt5_factory_creates_gpt5_provider() {
1274        let provider = OpenAIProvider::gpt5("test-api-key".to_string());
1275
1276        assert_eq!(provider.model(), MODEL_GPT5);
1277        assert_eq!(provider.provider(), "openai");
1278    }
1279
1280    #[test]
1281    fn test_gpt5_mini_factory_creates_provider() {
1282        let provider = OpenAIProvider::gpt5_mini("test-api-key".to_string());
1283
1284        assert_eq!(provider.model(), MODEL_GPT5_MINI);
1285        assert_eq!(provider.provider(), "openai");
1286    }
1287
1288    #[test]
1289    fn test_o3_factory_creates_o3_provider() {
1290        let provider = OpenAIProvider::o3("test-api-key".to_string());
1291
1292        assert_eq!(provider.model(), MODEL_O3);
1293        assert_eq!(provider.provider(), "openai");
1294    }
1295
1296    #[test]
1297    fn test_o4_mini_factory_creates_o4_mini_provider() {
1298        let provider = OpenAIProvider::o4_mini("test-api-key".to_string());
1299
1300        assert_eq!(provider.model(), MODEL_O4_MINI);
1301        assert_eq!(provider.provider(), "openai");
1302    }
1303
1304    #[test]
1305    fn test_o1_factory_creates_o1_provider() {
1306        let provider = OpenAIProvider::o1("test-api-key".to_string());
1307
1308        assert_eq!(provider.model(), MODEL_O1);
1309        assert_eq!(provider.provider(), "openai");
1310    }
1311
1312    #[test]
1313    fn test_gpt41_factory_creates_gpt41_provider() {
1314        let provider = OpenAIProvider::gpt41("test-api-key".to_string());
1315
1316        assert_eq!(provider.model(), MODEL_GPT41);
1317        assert_eq!(provider.provider(), "openai");
1318    }
1319
1320    #[test]
1321    fn test_kimi_factory_creates_provider_with_kimi_base_url() {
1322        let provider = OpenAIProvider::kimi("test-api-key".to_string(), "kimi-custom".to_string());
1323
1324        assert_eq!(provider.model(), "kimi-custom");
1325        assert_eq!(provider.base_url, BASE_URL_KIMI);
1326        assert_eq!(provider.provider(), "openai");
1327    }
1328
1329    #[test]
1330    fn test_kimi_k2_5_factory_creates_provider() {
1331        let provider = OpenAIProvider::kimi_k2_5("test-api-key".to_string());
1332
1333        assert_eq!(provider.model(), MODEL_KIMI_K2_5);
1334        assert_eq!(provider.base_url, BASE_URL_KIMI);
1335        assert_eq!(provider.provider(), "openai");
1336    }
1337
1338    #[test]
1339    fn test_kimi_k2_thinking_factory_creates_provider() {
1340        let provider = OpenAIProvider::kimi_k2_thinking("test-api-key".to_string());
1341
1342        assert_eq!(provider.model(), MODEL_KIMI_K2_THINKING);
1343        assert_eq!(provider.base_url, BASE_URL_KIMI);
1344        assert_eq!(provider.provider(), "openai");
1345    }
1346
1347    #[test]
1348    fn test_zai_factory_creates_provider_with_zai_base_url() {
1349        let provider = OpenAIProvider::zai("test-api-key".to_string(), "glm-custom".to_string());
1350
1351        assert_eq!(provider.model(), "glm-custom");
1352        assert_eq!(provider.base_url, BASE_URL_ZAI);
1353        assert_eq!(provider.provider(), "openai");
1354    }
1355
1356    #[test]
1357    fn test_zai_glm5_factory_creates_provider() {
1358        let provider = OpenAIProvider::zai_glm5("test-api-key".to_string());
1359
1360        assert_eq!(provider.model(), MODEL_ZAI_GLM5);
1361        assert_eq!(provider.base_url, BASE_URL_ZAI);
1362        assert_eq!(provider.provider(), "openai");
1363    }
1364
1365    #[test]
1366    fn test_minimax_factory_creates_provider_with_minimax_base_url() {
1367        let provider =
1368            OpenAIProvider::minimax("test-api-key".to_string(), "minimax-custom".to_string());
1369
1370        assert_eq!(provider.model(), "minimax-custom");
1371        assert_eq!(provider.base_url, BASE_URL_MINIMAX);
1372        assert_eq!(provider.provider(), "openai");
1373    }
1374
1375    #[test]
1376    fn test_minimax_m2_5_factory_creates_provider() {
1377        let provider = OpenAIProvider::minimax_m2_5("test-api-key".to_string());
1378
1379        assert_eq!(provider.model(), MODEL_MINIMAX_M2_5);
1380        assert_eq!(provider.base_url, BASE_URL_MINIMAX);
1381        assert_eq!(provider.provider(), "openai");
1382    }
1383
1384    // ===================
1385    // Model Constants Tests
1386    // ===================
1387
1388    #[test]
1389    fn test_model_constants_have_expected_values() {
1390        // GPT-5.4 / GPT-5.3 Codex
1391        assert_eq!(MODEL_GPT54, "gpt-5.4");
1392        assert_eq!(MODEL_GPT53_CODEX, "gpt-5.3-codex");
1393        // GPT-5.2 series
1394        assert_eq!(MODEL_GPT52_INSTANT, "gpt-5.2-instant");
1395        assert_eq!(MODEL_GPT52_THINKING, "gpt-5.2-thinking");
1396        assert_eq!(MODEL_GPT52_PRO, "gpt-5.2-pro");
1397        assert_eq!(MODEL_GPT52_CODEX, "gpt-5.2-codex");
1398        // GPT-5 series
1399        assert_eq!(MODEL_GPT5, "gpt-5");
1400        assert_eq!(MODEL_GPT5_MINI, "gpt-5-mini");
1401        assert_eq!(MODEL_GPT5_NANO, "gpt-5-nano");
1402        // o-series
1403        assert_eq!(MODEL_O3, "o3");
1404        assert_eq!(MODEL_O3_MINI, "o3-mini");
1405        assert_eq!(MODEL_O4_MINI, "o4-mini");
1406        assert_eq!(MODEL_O1, "o1");
1407        assert_eq!(MODEL_O1_MINI, "o1-mini");
1408        // GPT-4.1 series
1409        assert_eq!(MODEL_GPT41, "gpt-4.1");
1410        assert_eq!(MODEL_GPT41_MINI, "gpt-4.1-mini");
1411        assert_eq!(MODEL_GPT41_NANO, "gpt-4.1-nano");
1412        // GPT-4o series
1413        assert_eq!(MODEL_GPT4O, "gpt-4o");
1414        assert_eq!(MODEL_GPT4O_MINI, "gpt-4o-mini");
1415        // OpenAI-compatible vendor defaults
1416        assert_eq!(MODEL_KIMI_K2_5, "kimi-k2.5");
1417        assert_eq!(MODEL_KIMI_K2_THINKING, "kimi-k2-thinking");
1418        assert_eq!(MODEL_ZAI_GLM5, "glm-5");
1419        assert_eq!(MODEL_MINIMAX_M2_5, "MiniMax-M2.5");
1420        assert_eq!(BASE_URL_KIMI, "https://api.moonshot.ai/v1");
1421        assert_eq!(BASE_URL_ZAI, "https://api.z.ai/api/paas/v4");
1422        assert_eq!(BASE_URL_MINIMAX, "https://api.minimax.io/v1");
1423    }
1424
1425    // ===================
1426    // Clone Tests
1427    // ===================
1428
1429    #[test]
1430    fn test_provider_is_cloneable() {
1431        let provider = OpenAIProvider::new("test-api-key".to_string(), "test-model".to_string());
1432        let cloned = provider.clone();
1433
1434        assert_eq!(provider.model(), cloned.model());
1435        assert_eq!(provider.provider(), cloned.provider());
1436        assert_eq!(provider.base_url, cloned.base_url);
1437    }
1438
1439    // ===================
1440    // API Type Serialization Tests
1441    // ===================
1442
1443    #[test]
1444    fn test_api_role_serialization() {
1445        let system_role = ApiRole::System;
1446        let user_role = ApiRole::User;
1447        let assistant_role = ApiRole::Assistant;
1448        let tool_role = ApiRole::Tool;
1449
1450        assert_eq!(serde_json::to_string(&system_role).unwrap(), "\"system\"");
1451        assert_eq!(serde_json::to_string(&user_role).unwrap(), "\"user\"");
1452        assert_eq!(
1453            serde_json::to_string(&assistant_role).unwrap(),
1454            "\"assistant\""
1455        );
1456        assert_eq!(serde_json::to_string(&tool_role).unwrap(), "\"tool\"");
1457    }
1458
1459    #[test]
1460    fn test_api_message_serialization_simple() {
1461        let message = ApiMessage {
1462            role: ApiRole::User,
1463            content: Some("Hello, world!".to_string()),
1464            tool_calls: None,
1465            tool_call_id: None,
1466        };
1467
1468        let json = serde_json::to_string(&message).unwrap();
1469        assert!(json.contains("\"role\":\"user\""));
1470        assert!(json.contains("\"content\":\"Hello, world!\""));
1471        // Optional fields should be omitted
1472        assert!(!json.contains("tool_calls"));
1473        assert!(!json.contains("tool_call_id"));
1474    }
1475
1476    #[test]
1477    fn test_api_message_serialization_with_tool_calls() {
1478        let message = ApiMessage {
1479            role: ApiRole::Assistant,
1480            content: Some("Let me help.".to_string()),
1481            tool_calls: Some(vec![ApiToolCall {
1482                id: "call_123".to_string(),
1483                r#type: "function".to_string(),
1484                function: ApiFunctionCall {
1485                    name: "read_file".to_string(),
1486                    arguments: "{\"path\": \"/test.txt\"}".to_string(),
1487                },
1488            }]),
1489            tool_call_id: None,
1490        };
1491
1492        let json = serde_json::to_string(&message).unwrap();
1493        assert!(json.contains("\"role\":\"assistant\""));
1494        assert!(json.contains("\"tool_calls\""));
1495        assert!(json.contains("\"id\":\"call_123\""));
1496        assert!(json.contains("\"type\":\"function\""));
1497        assert!(json.contains("\"name\":\"read_file\""));
1498    }
1499
1500    #[test]
1501    fn test_api_tool_message_serialization() {
1502        let message = ApiMessage {
1503            role: ApiRole::Tool,
1504            content: Some("File contents here".to_string()),
1505            tool_calls: None,
1506            tool_call_id: Some("call_123".to_string()),
1507        };
1508
1509        let json = serde_json::to_string(&message).unwrap();
1510        assert!(json.contains("\"role\":\"tool\""));
1511        assert!(json.contains("\"tool_call_id\":\"call_123\""));
1512        assert!(json.contains("\"content\":\"File contents here\""));
1513    }
1514
1515    #[test]
1516    fn test_api_tool_serialization() {
1517        let tool = ApiTool {
1518            r#type: "function".to_string(),
1519            function: ApiFunction {
1520                name: "test_tool".to_string(),
1521                description: "A test tool".to_string(),
1522                parameters: serde_json::json!({
1523                    "type": "object",
1524                    "properties": {
1525                        "arg": {"type": "string"}
1526                    }
1527                }),
1528            },
1529        };
1530
1531        let json = serde_json::to_string(&tool).unwrap();
1532        assert!(json.contains("\"type\":\"function\""));
1533        assert!(json.contains("\"name\":\"test_tool\""));
1534        assert!(json.contains("\"description\":\"A test tool\""));
1535        assert!(json.contains("\"parameters\""));
1536    }
1537
1538    // ===================
1539    // API Type Deserialization Tests
1540    // ===================
1541
1542    #[test]
1543    fn test_api_response_deserialization() {
1544        let json = r#"{
1545            "id": "chatcmpl-123",
1546            "choices": [
1547                {
1548                    "message": {
1549                        "content": "Hello!"
1550                    },
1551                    "finish_reason": "stop"
1552                }
1553            ],
1554            "model": "gpt-4o",
1555            "usage": {
1556                "prompt_tokens": 100,
1557                "completion_tokens": 50
1558            }
1559        }"#;
1560
1561        let response: ApiChatResponse = serde_json::from_str(json).unwrap();
1562        assert_eq!(response.id, "chatcmpl-123");
1563        assert_eq!(response.model, "gpt-4o");
1564        assert_eq!(response.usage.prompt_tokens, 100);
1565        assert_eq!(response.usage.completion_tokens, 50);
1566        assert_eq!(response.choices.len(), 1);
1567        assert_eq!(
1568            response.choices[0].message.content,
1569            Some("Hello!".to_string())
1570        );
1571    }
1572
1573    #[test]
1574    fn test_api_response_with_tool_calls_deserialization() {
1575        let json = r#"{
1576            "id": "chatcmpl-456",
1577            "choices": [
1578                {
1579                    "message": {
1580                        "content": null,
1581                        "tool_calls": [
1582                            {
1583                                "id": "call_abc",
1584                                "type": "function",
1585                                "function": {
1586                                    "name": "read_file",
1587                                    "arguments": "{\"path\": \"test.txt\"}"
1588                                }
1589                            }
1590                        ]
1591                    },
1592                    "finish_reason": "tool_calls"
1593                }
1594            ],
1595            "model": "gpt-4o",
1596            "usage": {
1597                "prompt_tokens": 150,
1598                "completion_tokens": 30
1599            }
1600        }"#;
1601
1602        let response: ApiChatResponse = serde_json::from_str(json).unwrap();
1603        let tool_calls = response.choices[0].message.tool_calls.as_ref().unwrap();
1604        assert_eq!(tool_calls.len(), 1);
1605        assert_eq!(tool_calls[0].id, "call_abc");
1606        assert_eq!(tool_calls[0].function.name, "read_file");
1607    }
1608
1609    #[test]
1610    fn test_api_response_with_unknown_finish_reason_deserialization() {
1611        let json = r#"{
1612            "id": "chatcmpl-789",
1613            "choices": [
1614                {
1615                    "message": {
1616                        "content": "ok"
1617                    },
1618                    "finish_reason": "vendor_custom_reason"
1619                }
1620            ],
1621            "model": "glm-5",
1622            "usage": {
1623                "prompt_tokens": 10,
1624                "completion_tokens": 5
1625            }
1626        }"#;
1627
1628        let response: ApiChatResponse = serde_json::from_str(json).unwrap();
1629        assert_eq!(
1630            response.choices[0].finish_reason.as_deref(),
1631            Some("vendor_custom_reason")
1632        );
1633        assert_eq!(
1634            map_finish_reason(response.choices[0].finish_reason.as_deref().unwrap()),
1635            StopReason::StopSequence
1636        );
1637    }
1638
1639    #[test]
1640    fn test_map_finish_reason_covers_vendor_specific_values() {
1641        assert_eq!(map_finish_reason("stop"), StopReason::EndTurn);
1642        assert_eq!(map_finish_reason("tool_calls"), StopReason::ToolUse);
1643        assert_eq!(map_finish_reason("length"), StopReason::MaxTokens);
1644        assert_eq!(
1645            map_finish_reason("content_filter"),
1646            StopReason::StopSequence
1647        );
1648        assert_eq!(map_finish_reason("sensitive"), StopReason::Refusal);
1649        assert_eq!(map_finish_reason("network_error"), StopReason::StopSequence);
1650        assert_eq!(
1651            map_finish_reason("some_new_reason"),
1652            StopReason::StopSequence
1653        );
1654    }
1655
1656    // ===================
1657    // Message Conversion Tests
1658    // ===================
1659
1660    #[test]
1661    fn test_build_api_messages_with_system() {
1662        let request = ChatRequest {
1663            system: "You are helpful.".to_string(),
1664            messages: vec![crate::llm::Message::user("Hello")],
1665            tools: None,
1666            max_tokens: 1024,
1667            max_tokens_explicit: true,
1668            session_id: None,
1669            cached_content: None,
1670            thinking: None,
1671        };
1672
1673        let api_messages = build_api_messages(&request);
1674        assert_eq!(api_messages.len(), 2);
1675        assert_eq!(api_messages[0].role, ApiRole::System);
1676        assert_eq!(
1677            api_messages[0].content,
1678            Some("You are helpful.".to_string())
1679        );
1680        assert_eq!(api_messages[1].role, ApiRole::User);
1681        assert_eq!(api_messages[1].content, Some("Hello".to_string()));
1682    }
1683
1684    #[test]
1685    fn test_build_api_messages_empty_system() {
1686        let request = ChatRequest {
1687            system: String::new(),
1688            messages: vec![crate::llm::Message::user("Hello")],
1689            tools: None,
1690            max_tokens: 1024,
1691            max_tokens_explicit: true,
1692            session_id: None,
1693            cached_content: None,
1694            thinking: None,
1695        };
1696
1697        let api_messages = build_api_messages(&request);
1698        assert_eq!(api_messages.len(), 1);
1699        assert_eq!(api_messages[0].role, ApiRole::User);
1700    }
1701
1702    #[test]
1703    fn test_convert_tool() {
1704        let tool = crate::llm::Tool {
1705            name: "test_tool".to_string(),
1706            description: "A test tool".to_string(),
1707            input_schema: serde_json::json!({"type": "object"}),
1708        };
1709
1710        let api_tool = convert_tool(tool);
1711        assert_eq!(api_tool.r#type, "function");
1712        assert_eq!(api_tool.function.name, "test_tool");
1713        assert_eq!(api_tool.function.description, "A test tool");
1714    }
1715
1716    #[test]
1717    fn test_build_content_blocks_text_only() {
1718        let message = ApiResponseMessage {
1719            content: Some("Hello!".to_string()),
1720            tool_calls: None,
1721        };
1722
1723        let blocks = build_content_blocks(&message);
1724        assert_eq!(blocks.len(), 1);
1725        assert!(matches!(&blocks[0], ContentBlock::Text { text } if text == "Hello!"));
1726    }
1727
1728    #[test]
1729    fn test_build_content_blocks_with_tool_calls() {
1730        let message = ApiResponseMessage {
1731            content: Some("Let me help.".to_string()),
1732            tool_calls: Some(vec![ApiResponseToolCall {
1733                id: "call_123".to_string(),
1734                function: ApiResponseFunctionCall {
1735                    name: "read_file".to_string(),
1736                    arguments: "{\"path\": \"test.txt\"}".to_string(),
1737                },
1738            }]),
1739        };
1740
1741        let blocks = build_content_blocks(&message);
1742        assert_eq!(blocks.len(), 2);
1743        assert!(matches!(&blocks[0], ContentBlock::Text { text } if text == "Let me help."));
1744        assert!(
1745            matches!(&blocks[1], ContentBlock::ToolUse { id, name, .. } if id == "call_123" && name == "read_file")
1746        );
1747    }
1748
1749    // ===================
1750    // SSE Streaming Type Tests
1751    // ===================
1752
1753    #[test]
1754    fn test_sse_chunk_text_delta_deserialization() {
1755        let json = r#"{
1756            "choices": [{
1757                "delta": {
1758                    "content": "Hello"
1759                },
1760                "finish_reason": null
1761            }]
1762        }"#;
1763
1764        let chunk: SseChunk = serde_json::from_str(json).unwrap();
1765        assert_eq!(chunk.choices.len(), 1);
1766        assert_eq!(chunk.choices[0].delta.content, Some("Hello".to_string()));
1767        assert!(chunk.choices[0].finish_reason.is_none());
1768    }
1769
1770    #[test]
1771    fn test_sse_chunk_tool_call_delta_deserialization() {
1772        let json = r#"{
1773            "choices": [{
1774                "delta": {
1775                    "tool_calls": [{
1776                        "index": 0,
1777                        "id": "call_abc",
1778                        "function": {
1779                            "name": "read_file",
1780                            "arguments": ""
1781                        }
1782                    }]
1783                },
1784                "finish_reason": null
1785            }]
1786        }"#;
1787
1788        let chunk: SseChunk = serde_json::from_str(json).unwrap();
1789        let tool_calls = chunk.choices[0].delta.tool_calls.as_ref().unwrap();
1790        assert_eq!(tool_calls.len(), 1);
1791        assert_eq!(tool_calls[0].index, 0);
1792        assert_eq!(tool_calls[0].id, Some("call_abc".to_string()));
1793        assert_eq!(
1794            tool_calls[0].function.as_ref().unwrap().name,
1795            Some("read_file".to_string())
1796        );
1797    }
1798
1799    #[test]
1800    fn test_sse_chunk_tool_call_arguments_delta_deserialization() {
1801        let json = r#"{
1802            "choices": [{
1803                "delta": {
1804                    "tool_calls": [{
1805                        "index": 0,
1806                        "function": {
1807                            "arguments": "{\"path\":"
1808                        }
1809                    }]
1810                },
1811                "finish_reason": null
1812            }]
1813        }"#;
1814
1815        let chunk: SseChunk = serde_json::from_str(json).unwrap();
1816        let tool_calls = chunk.choices[0].delta.tool_calls.as_ref().unwrap();
1817        assert_eq!(tool_calls[0].id, None);
1818        assert_eq!(
1819            tool_calls[0].function.as_ref().unwrap().arguments,
1820            Some("{\"path\":".to_string())
1821        );
1822    }
1823
1824    #[test]
1825    fn test_sse_chunk_with_finish_reason_deserialization() {
1826        let json = r#"{
1827            "choices": [{
1828                "delta": {},
1829                "finish_reason": "stop"
1830            }]
1831        }"#;
1832
1833        let chunk: SseChunk = serde_json::from_str(json).unwrap();
1834        assert_eq!(chunk.choices[0].finish_reason.as_deref(), Some("stop"));
1835    }
1836
1837    #[test]
1838    fn test_sse_chunk_with_usage_deserialization() {
1839        let json = r#"{
1840            "choices": [{
1841                "delta": {},
1842                "finish_reason": "stop"
1843            }],
1844            "usage": {
1845                "prompt_tokens": 100,
1846                "completion_tokens": 50
1847            }
1848        }"#;
1849
1850        let chunk: SseChunk = serde_json::from_str(json).unwrap();
1851        let usage = chunk.usage.unwrap();
1852        assert_eq!(usage.prompt_tokens, 100);
1853        assert_eq!(usage.completion_tokens, 50);
1854    }
1855
1856    #[test]
1857    fn test_sse_chunk_with_float_usage_deserialization() {
1858        let json = r#"{
1859            "choices": [{
1860                "delta": {},
1861                "finish_reason": "stop"
1862            }],
1863            "usage": {
1864                "prompt_tokens": 100.0,
1865                "completion_tokens": 50.0
1866            }
1867        }"#;
1868
1869        let chunk: SseChunk = serde_json::from_str(json).unwrap();
1870        let usage = chunk.usage.unwrap();
1871        assert_eq!(usage.prompt_tokens, 100);
1872        assert_eq!(usage.completion_tokens, 50);
1873    }
1874
1875    #[test]
1876    fn test_api_usage_deserializes_integer_compatible_numbers() {
1877        let json = r#"{
1878            "prompt_tokens": 42.0,
1879            "completion_tokens": 7
1880        }"#;
1881
1882        let usage: ApiUsage = serde_json::from_str(json).unwrap();
1883        assert_eq!(usage.prompt_tokens, 42);
1884        assert_eq!(usage.completion_tokens, 7);
1885    }
1886
1887    #[test]
1888    fn test_api_usage_deserializes_cached_tokens() {
1889        let json = r#"{
1890            "prompt_tokens": 42,
1891            "completion_tokens": 7,
1892            "prompt_tokens_details": {
1893                "cached_tokens": 10
1894            }
1895        }"#;
1896
1897        let usage: ApiUsage = serde_json::from_str(json).unwrap();
1898        assert_eq!(usage.prompt_tokens, 42);
1899        assert_eq!(usage.completion_tokens, 7);
1900        assert_eq!(usage.prompt_tokens_details.unwrap().cached_tokens, 10);
1901    }
1902
1903    #[test]
1904    fn test_api_usage_rejects_fractional_numbers() {
1905        let json = r#"{
1906            "prompt_tokens": 42.5,
1907            "completion_tokens": 7
1908        }"#;
1909
1910        let usage: std::result::Result<ApiUsage, _> = serde_json::from_str(json);
1911        assert!(usage.is_err());
1912    }
1913
1914    #[test]
1915    fn test_use_max_tokens_alias_for_vendor_urls() {
1916        assert!(!use_max_tokens_alias(DEFAULT_BASE_URL));
1917        assert!(use_max_tokens_alias(BASE_URL_KIMI));
1918        assert!(use_max_tokens_alias(BASE_URL_ZAI));
1919        assert!(use_max_tokens_alias(BASE_URL_MINIMAX));
1920    }
1921
1922    #[test]
1923    fn test_requires_responses_api_only_for_legacy_codex_model() {
1924        assert!(requires_responses_api(MODEL_GPT52_CODEX));
1925        assert!(!requires_responses_api(MODEL_GPT53_CODEX));
1926        assert!(!requires_responses_api(MODEL_GPT54));
1927    }
1928
1929    #[test]
1930    fn test_should_use_responses_api_for_official_agentic_requests() {
1931        let request = ChatRequest {
1932            system: String::new(),
1933            messages: vec![crate::llm::Message::user("Hello")],
1934            tools: Some(vec![crate::llm::Tool {
1935                name: "read_file".to_string(),
1936                description: "Read a file".to_string(),
1937                input_schema: serde_json::json!({"type": "object"}),
1938            }]),
1939            max_tokens: 1024,
1940            max_tokens_explicit: true,
1941            session_id: Some("thread-1".to_string()),
1942            cached_content: None,
1943            thinking: None,
1944        };
1945
1946        assert!(should_use_responses_api(
1947            DEFAULT_BASE_URL,
1948            MODEL_GPT54,
1949            &request
1950        ));
1951        assert!(!should_use_responses_api(
1952            BASE_URL_KIMI,
1953            MODEL_GPT54,
1954            &request
1955        ));
1956    }
1957
1958    #[test]
1959    fn test_build_api_reasoning_maps_enabled_budget_to_effort() {
1960        let reasoning = build_api_reasoning(Some(&ThinkingConfig::new(40_000))).unwrap();
1961        assert!(matches!(reasoning.effort, ReasoningEffort::XHigh));
1962    }
1963
1964    #[test]
1965    fn test_build_api_reasoning_uses_explicit_effort() {
1966        let reasoning =
1967            build_api_reasoning(Some(&ThinkingConfig::adaptive_with_effort(Effort::High))).unwrap();
1968        assert!(matches!(reasoning.effort, ReasoningEffort::High));
1969    }
1970
1971    #[test]
1972    fn test_build_api_reasoning_omits_adaptive_without_effort() {
1973        assert!(build_api_reasoning(Some(&ThinkingConfig::adaptive())).is_none());
1974    }
1975
1976    #[test]
1977    fn test_openai_rejects_adaptive_thinking() {
1978        let provider = OpenAIProvider::gpt54("test-key".to_string());
1979        let error = provider
1980            .validate_thinking_config(Some(&ThinkingConfig::adaptive()))
1981            .unwrap_err();
1982        assert!(
1983            error
1984                .to_string()
1985                .contains("adaptive thinking is not supported")
1986        );
1987    }
1988
1989    #[test]
1990    fn test_openai_non_reasoning_models_reject_thinking() {
1991        let provider = OpenAIProvider::gpt4o("test-key".to_string());
1992        let error = provider
1993            .validate_thinking_config(Some(&ThinkingConfig::new(10_000)))
1994            .unwrap_err();
1995        assert!(error.to_string().contains("thinking is not supported"));
1996    }
1997
1998    #[test]
1999    fn test_request_serialization_openai_uses_max_completion_tokens_only() {
2000        let messages = vec![ApiMessage {
2001            role: ApiRole::User,
2002            content: Some("Hello".to_string()),
2003            tool_calls: None,
2004            tool_call_id: None,
2005        }];
2006
2007        let request = ApiChatRequest {
2008            model: "gpt-4o",
2009            messages: &messages,
2010            max_completion_tokens: Some(1024),
2011            max_tokens: None,
2012            tools: None,
2013            reasoning: None,
2014        };
2015
2016        let json = serde_json::to_string(&request).unwrap();
2017        assert!(json.contains("\"max_completion_tokens\":1024"));
2018        assert!(!json.contains("\"max_tokens\""));
2019    }
2020
2021    #[test]
2022    fn test_request_serialization_with_max_tokens_alias() {
2023        let messages = vec![ApiMessage {
2024            role: ApiRole::User,
2025            content: Some("Hello".to_string()),
2026            tool_calls: None,
2027            tool_call_id: None,
2028        }];
2029
2030        let request = ApiChatRequest {
2031            model: "glm-5",
2032            messages: &messages,
2033            max_completion_tokens: Some(1024),
2034            max_tokens: Some(1024),
2035            tools: None,
2036            reasoning: None,
2037        };
2038
2039        let json = serde_json::to_string(&request).unwrap();
2040        assert!(json.contains("\"max_completion_tokens\":1024"));
2041        assert!(json.contains("\"max_tokens\":1024"));
2042    }
2043
2044    #[test]
2045    fn test_streaming_request_serialization_openai_default() {
2046        let messages = vec![ApiMessage {
2047            role: ApiRole::User,
2048            content: Some("Hello".to_string()),
2049            tool_calls: None,
2050            tool_call_id: None,
2051        }];
2052
2053        let request = ApiChatRequestStreaming {
2054            model: "gpt-4o",
2055            messages: &messages,
2056            max_completion_tokens: Some(1024),
2057            max_tokens: None,
2058            tools: None,
2059            reasoning: None,
2060            stream_options: Some(ApiStreamOptions {
2061                include_usage: true,
2062            }),
2063            stream: true,
2064        };
2065
2066        let json = serde_json::to_string(&request).unwrap();
2067        assert!(json.contains("\"stream\":true"));
2068        assert!(json.contains("\"model\":\"gpt-4o\""));
2069        assert!(json.contains("\"max_completion_tokens\":1024"));
2070        assert!(json.contains("\"stream_options\":{\"include_usage\":true}"));
2071        assert!(!json.contains("\"max_tokens\""));
2072    }
2073
2074    #[test]
2075    fn test_streaming_request_serialization_with_max_tokens_alias() {
2076        let messages = vec![ApiMessage {
2077            role: ApiRole::User,
2078            content: Some("Hello".to_string()),
2079            tool_calls: None,
2080            tool_call_id: None,
2081        }];
2082
2083        let request = ApiChatRequestStreaming {
2084            model: "kimi-k2-thinking",
2085            messages: &messages,
2086            max_completion_tokens: Some(1024),
2087            max_tokens: Some(1024),
2088            tools: None,
2089            reasoning: None,
2090            stream_options: None,
2091            stream: true,
2092        };
2093
2094        let json = serde_json::to_string(&request).unwrap();
2095        assert!(json.contains("\"max_completion_tokens\":1024"));
2096        assert!(json.contains("\"max_tokens\":1024"));
2097        assert!(!json.contains("\"stream_options\""));
2098    }
2099
2100    #[test]
2101    fn test_request_serialization_includes_reasoning_when_present() {
2102        let messages = vec![ApiMessage {
2103            role: ApiRole::User,
2104            content: Some("Hello".to_string()),
2105            tool_calls: None,
2106            tool_call_id: None,
2107        }];
2108
2109        let request = ApiChatRequest {
2110            model: MODEL_GPT54,
2111            messages: &messages,
2112            max_completion_tokens: Some(1024),
2113            max_tokens: None,
2114            tools: None,
2115            reasoning: Some(ApiReasoning {
2116                effort: ReasoningEffort::High,
2117            }),
2118        };
2119
2120        let json = serde_json::to_string(&request).unwrap();
2121        assert!(json.contains("\"reasoning\":{\"effort\":\"high\"}"));
2122    }
2123}