Skip to main content

agent_sdk/providers/
openai.rs

1//! `OpenAI` API provider implementation.
2//!
3//! This module provides an implementation of `LlmProvider` for the `OpenAI`
4//! Chat Completions API. It also supports `OpenAI`-compatible APIs (Ollama, vLLM, etc.)
5//! via the `with_base_url` constructor.
6//!
7//! Legacy models that require the Responses API (like `gpt-5.2-codex`) are automatically
8//! routed to the correct endpoint.
9
10use crate::llm::attachments::{request_has_attachments, validate_request_attachments};
11use crate::llm::{
12    ChatOutcome, ChatRequest, ChatResponse, Content, ContentBlock, Effort, LlmProvider, StopReason,
13    StreamBox, StreamDelta, ThinkingConfig, ThinkingMode, Usage,
14};
15use anyhow::Result;
16use async_trait::async_trait;
17use futures::StreamExt;
18use reqwest::StatusCode;
19use serde::de::Error as _;
20use serde::{Deserialize, Serialize};
21
22use super::openai_responses::OpenAIResponsesProvider;
23
24const DEFAULT_BASE_URL: &str = "https://api.openai.com/v1";
25
26/// Check if a model requires the Responses API instead of Chat Completions.
27fn requires_responses_api(model: &str) -> bool {
28    model == MODEL_GPT52_CODEX
29}
30
31// GPT-5.4 series
32pub const MODEL_GPT54: &str = "gpt-5.4";
33
34// GPT-5.3 Codex series
35pub const MODEL_GPT53_CODEX: &str = "gpt-5.3-codex";
36
37// GPT-5.2 series
38pub const MODEL_GPT52_INSTANT: &str = "gpt-5.2-instant";
39pub const MODEL_GPT52_THINKING: &str = "gpt-5.2-thinking";
40pub const MODEL_GPT52_PRO: &str = "gpt-5.2-pro";
41pub const MODEL_GPT52_CODEX: &str = "gpt-5.2-codex";
42
43// GPT-5 series (400k context)
44pub const MODEL_GPT5: &str = "gpt-5";
45pub const MODEL_GPT5_MINI: &str = "gpt-5-mini";
46pub const MODEL_GPT5_NANO: &str = "gpt-5-nano";
47
48// o-series reasoning models
49pub const MODEL_O3: &str = "o3";
50pub const MODEL_O3_MINI: &str = "o3-mini";
51pub const MODEL_O4_MINI: &str = "o4-mini";
52pub const MODEL_O1: &str = "o1";
53pub const MODEL_O1_MINI: &str = "o1-mini";
54
55// GPT-4.1 series (improved instruction following, 1M context)
56pub const MODEL_GPT41: &str = "gpt-4.1";
57pub const MODEL_GPT41_MINI: &str = "gpt-4.1-mini";
58pub const MODEL_GPT41_NANO: &str = "gpt-4.1-nano";
59
60// GPT-4o series
61pub const MODEL_GPT4O: &str = "gpt-4o";
62pub const MODEL_GPT4O_MINI: &str = "gpt-4o-mini";
63
64// OpenAI-compatible vendor defaults
65pub const BASE_URL_KIMI: &str = "https://api.moonshot.ai/v1";
66pub const BASE_URL_ZAI: &str = "https://api.z.ai/api/paas/v4";
67pub const BASE_URL_MINIMAX: &str = "https://api.minimax.io/v1";
68pub const MODEL_KIMI_K2_5: &str = "kimi-k2.5";
69pub const MODEL_KIMI_K2_THINKING: &str = "kimi-k2-thinking";
70pub const MODEL_ZAI_GLM5: &str = "glm-5";
71pub const MODEL_MINIMAX_M2_5: &str = "MiniMax-M2.5";
72
73/// `OpenAI` LLM provider using the Chat Completions API.
74///
75/// Also supports `OpenAI`-compatible APIs (Ollama, vLLM, Azure `OpenAI`, etc.)
76/// via the `with_base_url` constructor.
77#[derive(Clone)]
78pub struct OpenAIProvider {
79    client: reqwest::Client,
80    api_key: String,
81    model: String,
82    base_url: String,
83    thinking: Option<ThinkingConfig>,
84}
85
86impl OpenAIProvider {
87    /// Create a new `OpenAI` provider with the specified API key and model.
88    #[must_use]
89    pub fn new(api_key: String, model: String) -> Self {
90        Self {
91            client: reqwest::Client::new(),
92            api_key,
93            model,
94            base_url: DEFAULT_BASE_URL.to_owned(),
95            thinking: None,
96        }
97    }
98
99    /// Create a new provider with a custom base URL for OpenAI-compatible APIs.
100    #[must_use]
101    pub fn with_base_url(api_key: String, model: String, base_url: String) -> Self {
102        Self {
103            client: reqwest::Client::new(),
104            api_key,
105            model,
106            base_url,
107            thinking: None,
108        }
109    }
110
111    /// Create a provider using Moonshot KIMI via OpenAI-compatible Chat Completions.
112    #[must_use]
113    pub fn kimi(api_key: String, model: String) -> Self {
114        Self::with_base_url(api_key, model, BASE_URL_KIMI.to_owned())
115    }
116
117    /// Create a provider using KIMI K2.5 (default KIMI model).
118    #[must_use]
119    pub fn kimi_k2_5(api_key: String) -> Self {
120        Self::kimi(api_key, MODEL_KIMI_K2_5.to_owned())
121    }
122
123    /// Create a provider using KIMI K2 Thinking.
124    #[must_use]
125    pub fn kimi_k2_thinking(api_key: String) -> Self {
126        Self::kimi(api_key, MODEL_KIMI_K2_THINKING.to_owned())
127    }
128
129    /// Create a provider using z.ai via OpenAI-compatible Chat Completions.
130    #[must_use]
131    pub fn zai(api_key: String, model: String) -> Self {
132        Self::with_base_url(api_key, model, BASE_URL_ZAI.to_owned())
133    }
134
135    /// Create a provider using z.ai GLM-5 (default z.ai agentic reasoning model).
136    #[must_use]
137    pub fn zai_glm5(api_key: String) -> Self {
138        Self::zai(api_key, MODEL_ZAI_GLM5.to_owned())
139    }
140
141    /// Create a provider using `MiniMax` via OpenAI-compatible Chat Completions.
142    #[must_use]
143    pub fn minimax(api_key: String, model: String) -> Self {
144        Self::with_base_url(api_key, model, BASE_URL_MINIMAX.to_owned())
145    }
146
147    /// Create a provider using `MiniMax` M2.5 (default `MiniMax` model).
148    #[must_use]
149    pub fn minimax_m2_5(api_key: String) -> Self {
150        Self::minimax(api_key, MODEL_MINIMAX_M2_5.to_owned())
151    }
152
153    /// Create a provider using GPT-5.2 Instant (speed-optimized for routine queries).
154    #[must_use]
155    pub fn gpt52_instant(api_key: String) -> Self {
156        Self::new(api_key, MODEL_GPT52_INSTANT.to_owned())
157    }
158
159    /// Create a provider using GPT-5.4 (frontier reasoning with 1.05M context).
160    #[must_use]
161    pub fn gpt54(api_key: String) -> Self {
162        Self::new(api_key, MODEL_GPT54.to_owned())
163    }
164
165    /// Create a provider using GPT-5.3 Codex (latest codex model).
166    #[must_use]
167    pub fn gpt53_codex(api_key: String) -> Self {
168        Self::new(api_key, MODEL_GPT53_CODEX.to_owned())
169    }
170
171    /// Create a provider using GPT-5.2 Thinking (complex reasoning, coding, analysis).
172    #[must_use]
173    pub fn gpt52_thinking(api_key: String) -> Self {
174        Self::new(api_key, MODEL_GPT52_THINKING.to_owned())
175    }
176
177    /// Create a provider using GPT-5.2 Pro (maximum accuracy for difficult problems).
178    #[must_use]
179    pub fn gpt52_pro(api_key: String) -> Self {
180        Self::new(api_key, MODEL_GPT52_PRO.to_owned())
181    }
182
183    /// Create a provider using the latest Codex model.
184    #[must_use]
185    pub fn codex(api_key: String) -> Self {
186        Self::gpt53_codex(api_key)
187    }
188
189    /// Create a provider using GPT-5 (400k context, coding and reasoning).
190    #[must_use]
191    pub fn gpt5(api_key: String) -> Self {
192        Self::new(api_key, MODEL_GPT5.to_owned())
193    }
194
195    /// Create a provider using GPT-5-mini (faster, cost-efficient GPT-5).
196    #[must_use]
197    pub fn gpt5_mini(api_key: String) -> Self {
198        Self::new(api_key, MODEL_GPT5_MINI.to_owned())
199    }
200
201    /// Create a provider using GPT-5-nano (fastest, cheapest GPT-5 variant).
202    #[must_use]
203    pub fn gpt5_nano(api_key: String) -> Self {
204        Self::new(api_key, MODEL_GPT5_NANO.to_owned())
205    }
206
207    /// Create a provider using o3 (most intelligent reasoning model).
208    #[must_use]
209    pub fn o3(api_key: String) -> Self {
210        Self::new(api_key, MODEL_O3.to_owned())
211    }
212
213    /// Create a provider using o3-mini (smaller o3 variant).
214    #[must_use]
215    pub fn o3_mini(api_key: String) -> Self {
216        Self::new(api_key, MODEL_O3_MINI.to_owned())
217    }
218
219    /// Create a provider using o4-mini (fast, cost-efficient reasoning).
220    #[must_use]
221    pub fn o4_mini(api_key: String) -> Self {
222        Self::new(api_key, MODEL_O4_MINI.to_owned())
223    }
224
225    /// Create a provider using o1 (reasoning model).
226    #[must_use]
227    pub fn o1(api_key: String) -> Self {
228        Self::new(api_key, MODEL_O1.to_owned())
229    }
230
231    /// Create a provider using o1-mini (fast reasoning model).
232    #[must_use]
233    pub fn o1_mini(api_key: String) -> Self {
234        Self::new(api_key, MODEL_O1_MINI.to_owned())
235    }
236
237    /// Create a provider using GPT-4.1 (improved instruction following, 1M context).
238    #[must_use]
239    pub fn gpt41(api_key: String) -> Self {
240        Self::new(api_key, MODEL_GPT41.to_owned())
241    }
242
243    /// Create a provider using GPT-4.1-mini (smaller, faster GPT-4.1).
244    #[must_use]
245    pub fn gpt41_mini(api_key: String) -> Self {
246        Self::new(api_key, MODEL_GPT41_MINI.to_owned())
247    }
248
249    /// Create a provider using GPT-4o.
250    #[must_use]
251    pub fn gpt4o(api_key: String) -> Self {
252        Self::new(api_key, MODEL_GPT4O.to_owned())
253    }
254
255    /// Create a provider using GPT-4o-mini (fast and cost-effective).
256    #[must_use]
257    pub fn gpt4o_mini(api_key: String) -> Self {
258        Self::new(api_key, MODEL_GPT4O_MINI.to_owned())
259    }
260
261    /// Set the provider-owned thinking configuration for this model.
262    #[must_use]
263    pub const fn with_thinking(mut self, thinking: ThinkingConfig) -> Self {
264        self.thinking = Some(thinking);
265        self
266    }
267}
268
269#[async_trait]
270impl LlmProvider for OpenAIProvider {
271    async fn chat(&self, request: ChatRequest) -> Result<ChatOutcome> {
272        // Route to Responses API for models that require it (e.g., gpt-5.2-codex)
273        // or when the request includes native image/document attachments.
274        if requires_responses_api(&self.model) || request_has_attachments(&request) {
275            let mut responses_provider = OpenAIResponsesProvider::with_base_url(
276                self.api_key.clone(),
277                self.model.clone(),
278                self.base_url.clone(),
279            );
280            if let Some(thinking) = self.thinking.clone() {
281                responses_provider = responses_provider.with_thinking(thinking);
282            }
283            return responses_provider.chat(request).await;
284        }
285
286        let thinking_config = match self.resolve_thinking_config(request.thinking.as_ref()) {
287            Ok(thinking) => thinking,
288            Err(error) => return Ok(ChatOutcome::InvalidRequest(error.to_string())),
289        };
290        if let Err(error) = validate_request_attachments(self.provider(), self.model(), &request) {
291            return Ok(ChatOutcome::InvalidRequest(error.to_string()));
292        }
293        let reasoning = build_api_reasoning(thinking_config.as_ref());
294        let messages = build_api_messages(&request);
295        let tools: Option<Vec<ApiTool>> = request
296            .tools
297            .map(|ts| ts.into_iter().map(convert_tool).collect());
298
299        let api_request = build_api_chat_request(
300            &self.model,
301            &messages,
302            request.max_tokens,
303            tools.as_deref(),
304            reasoning,
305            use_max_tokens_alias(&self.base_url),
306        );
307
308        log::debug!(
309            "OpenAI LLM request model={} max_tokens={}",
310            self.model,
311            request.max_tokens
312        );
313
314        let response = self
315            .client
316            .post(format!("{}/chat/completions", self.base_url))
317            .header("Content-Type", "application/json")
318            .header("Authorization", format!("Bearer {}", self.api_key))
319            .json(&api_request)
320            .send()
321            .await
322            .map_err(|e| anyhow::anyhow!("request failed: {e}"))?;
323
324        let status = response.status();
325        let bytes = response
326            .bytes()
327            .await
328            .map_err(|e| anyhow::anyhow!("failed to read response body: {e}"))?;
329
330        log::debug!(
331            "OpenAI LLM response status={} body_len={}",
332            status,
333            bytes.len()
334        );
335
336        if status == StatusCode::TOO_MANY_REQUESTS {
337            return Ok(ChatOutcome::RateLimited);
338        }
339
340        if status.is_server_error() {
341            let body = String::from_utf8_lossy(&bytes);
342            log::error!("OpenAI server error status={status} body={body}");
343            return Ok(ChatOutcome::ServerError(body.into_owned()));
344        }
345
346        if status.is_client_error() {
347            let body = String::from_utf8_lossy(&bytes);
348            log::warn!("OpenAI client error status={status} body={body}");
349            return Ok(ChatOutcome::InvalidRequest(body.into_owned()));
350        }
351
352        let api_response: ApiChatResponse = serde_json::from_slice(&bytes)
353            .map_err(|e| anyhow::anyhow!("failed to parse response: {e}"))?;
354
355        let choice = api_response
356            .choices
357            .into_iter()
358            .next()
359            .ok_or_else(|| anyhow::anyhow!("no choices in response"))?;
360
361        let content = build_content_blocks(&choice.message);
362
363        let stop_reason = choice.finish_reason.as_deref().map(map_finish_reason);
364
365        Ok(ChatOutcome::Success(ChatResponse {
366            id: api_response.id,
367            content,
368            model: api_response.model,
369            stop_reason,
370            usage: Usage {
371                input_tokens: api_response.usage.prompt_tokens,
372                output_tokens: api_response.usage.completion_tokens,
373            },
374        }))
375    }
376
377    #[allow(clippy::too_many_lines)]
378    fn chat_stream(&self, request: ChatRequest) -> StreamBox<'_> {
379        // Route to Responses API for models that require it (e.g., gpt-5.2-codex)
380        // or when the request includes native image/document attachments.
381        if requires_responses_api(&self.model) || request_has_attachments(&request) {
382            let api_key = self.api_key.clone();
383            let model = self.model.clone();
384            let base_url = self.base_url.clone();
385            let thinking = self.thinking.clone();
386            return Box::pin(async_stream::stream! {
387                let mut responses_provider =
388                    OpenAIResponsesProvider::with_base_url(api_key, model, base_url);
389                if let Some(thinking) = thinking {
390                    responses_provider = responses_provider.with_thinking(thinking);
391                }
392                let mut stream = std::pin::pin!(responses_provider.chat_stream(request));
393                while let Some(item) = futures::StreamExt::next(&mut stream).await {
394                    yield item;
395                }
396            });
397        }
398
399        Box::pin(async_stream::stream! {
400            let thinking_config = match self.resolve_thinking_config(request.thinking.as_ref()) {
401                Ok(thinking) => thinking,
402                Err(error) => {
403                    yield Ok(StreamDelta::Error {
404                        message: error.to_string(),
405                        recoverable: false,
406                    });
407                    return;
408                }
409            };
410            if let Err(error) = validate_request_attachments(self.provider(), self.model(), &request) {
411                yield Ok(StreamDelta::Error {
412                    message: error.to_string(),
413                    recoverable: false,
414                });
415                return;
416            }
417            let reasoning = build_api_reasoning(thinking_config.as_ref());
418            let messages = build_api_messages(&request);
419            let tools: Option<Vec<ApiTool>> = request
420                .tools
421                .map(|ts| ts.into_iter().map(convert_tool).collect());
422
423            let api_request = build_api_chat_request_streaming(
424                &self.model,
425                &messages,
426                request.max_tokens,
427                tools.as_deref(),
428                reasoning,
429                use_max_tokens_alias(&self.base_url),
430            );
431
432            log::debug!("OpenAI streaming LLM request model={} max_tokens={}", self.model, request.max_tokens);
433
434            let Ok(response) = self.client
435                .post(format!("{}/chat/completions", self.base_url))
436                .header("Content-Type", "application/json")
437                .header("Authorization", format!("Bearer {}", self.api_key))
438                .json(&api_request)
439                .send()
440                .await
441            else {
442                yield Err(anyhow::anyhow!("request failed"));
443                return;
444            };
445
446            let status = response.status();
447
448            if !status.is_success() {
449                let body = response.text().await.unwrap_or_default();
450                let (recoverable, level) = if status == StatusCode::TOO_MANY_REQUESTS {
451                    (true, "rate_limit")
452                } else if status.is_server_error() {
453                    (true, "server_error")
454                } else {
455                    (false, "client_error")
456                };
457                log::warn!("OpenAI error status={status} body={body} kind={level}");
458                yield Ok(StreamDelta::Error { message: body, recoverable });
459                return;
460            }
461
462            // Track tool call state across deltas
463            let mut tool_calls: std::collections::HashMap<usize, ToolCallAccumulator> =
464                std::collections::HashMap::new();
465            let mut usage: Option<Usage> = None;
466            let mut buffer = String::new();
467            let mut stream = response.bytes_stream();
468
469            while let Some(chunk_result) = stream.next().await {
470                let Ok(chunk) = chunk_result else {
471                    yield Err(anyhow::anyhow!("stream error: {}", chunk_result.unwrap_err()));
472                    return;
473                };
474                buffer.push_str(&String::from_utf8_lossy(&chunk));
475
476                while let Some(pos) = buffer.find('\n') {
477                    let line = buffer[..pos].trim().to_string();
478                    buffer = buffer[pos + 1..].to_string();
479                    if line.is_empty() { continue; }
480                    let Some(data) = line.strip_prefix("data: ") else { continue; };
481
482                    for result in process_sse_data(data) {
483                        match result {
484                            SseProcessResult::TextDelta(c) => yield Ok(StreamDelta::TextDelta { delta: c, block_index: 0 }),
485                            SseProcessResult::ToolCallUpdate { index, id, name, arguments } => apply_tool_call_update(&mut tool_calls, index, id, name, arguments),
486                            SseProcessResult::Usage(u) => usage = Some(u),
487                            SseProcessResult::Done(sr) => {
488                                for d in build_stream_end_deltas(&tool_calls, usage.take(), sr) { yield Ok(d); }
489                                return;
490                            }
491                            SseProcessResult::Sentinel => {
492                                let sr = if tool_calls.is_empty() { StopReason::EndTurn } else { StopReason::ToolUse };
493                                for d in build_stream_end_deltas(&tool_calls, usage.take(), sr) { yield Ok(d); }
494                                return;
495                            }
496                        }
497                    }
498                }
499            }
500
501            // Stream ended without [DONE] - emit what we have
502            for delta in build_stream_end_deltas(&tool_calls, usage, StopReason::EndTurn) {
503                yield Ok(delta);
504            }
505        })
506    }
507
508    fn model(&self) -> &str {
509        &self.model
510    }
511
512    fn provider(&self) -> &'static str {
513        "openai"
514    }
515
516    fn configured_thinking(&self) -> Option<&ThinkingConfig> {
517        self.thinking.as_ref()
518    }
519}
520
521/// Apply a tool call update to the accumulator.
522fn apply_tool_call_update(
523    tool_calls: &mut std::collections::HashMap<usize, ToolCallAccumulator>,
524    index: usize,
525    id: Option<String>,
526    name: Option<String>,
527    arguments: Option<String>,
528) {
529    let entry = tool_calls
530        .entry(index)
531        .or_insert_with(|| ToolCallAccumulator {
532            id: String::new(),
533            name: String::new(),
534            arguments: String::new(),
535        });
536    if let Some(id) = id {
537        entry.id = id;
538    }
539    if let Some(name) = name {
540        entry.name = name;
541    }
542    if let Some(args) = arguments {
543        entry.arguments.push_str(&args);
544    }
545}
546
547/// Helper to emit tool call deltas and done event.
548fn build_stream_end_deltas(
549    tool_calls: &std::collections::HashMap<usize, ToolCallAccumulator>,
550    usage: Option<Usage>,
551    stop_reason: StopReason,
552) -> Vec<StreamDelta> {
553    let mut deltas = Vec::new();
554
555    // Emit tool calls
556    for (idx, tool) in tool_calls {
557        deltas.push(StreamDelta::ToolUseStart {
558            id: tool.id.clone(),
559            name: tool.name.clone(),
560            block_index: *idx + 1,
561            thought_signature: None,
562        });
563        deltas.push(StreamDelta::ToolInputDelta {
564            id: tool.id.clone(),
565            delta: tool.arguments.clone(),
566            block_index: *idx + 1,
567        });
568    }
569
570    // Emit usage
571    if let Some(u) = usage {
572        deltas.push(StreamDelta::Usage(u));
573    }
574
575    // Emit done
576    deltas.push(StreamDelta::Done {
577        stop_reason: Some(stop_reason),
578    });
579
580    deltas
581}
582
583/// Result of processing an SSE chunk.
584enum SseProcessResult {
585    /// Emit a text delta.
586    TextDelta(String),
587    /// Update tool call accumulator (index, optional id, optional name, optional args).
588    ToolCallUpdate {
589        index: usize,
590        id: Option<String>,
591        name: Option<String>,
592        arguments: Option<String>,
593    },
594    /// Usage information.
595    Usage(Usage),
596    /// Stream is done with a stop reason.
597    Done(StopReason),
598    /// Stream sentinel [DONE] was received.
599    Sentinel,
600}
601
602/// Process an SSE data line and return results to apply.
603fn process_sse_data(data: &str) -> Vec<SseProcessResult> {
604    if data == "[DONE]" {
605        return vec![SseProcessResult::Sentinel];
606    }
607
608    let Ok(chunk) = serde_json::from_str::<SseChunk>(data) else {
609        return vec![];
610    };
611
612    let mut results = Vec::new();
613
614    // Extract usage if present
615    if let Some(u) = chunk.usage {
616        results.push(SseProcessResult::Usage(Usage {
617            input_tokens: u.prompt_tokens,
618            output_tokens: u.completion_tokens,
619        }));
620    }
621
622    // Process choices
623    if let Some(choice) = chunk.choices.into_iter().next() {
624        // Handle text content delta
625        if let Some(content) = choice.delta.content
626            && !content.is_empty()
627        {
628            results.push(SseProcessResult::TextDelta(content));
629        }
630
631        // Handle tool call deltas
632        if let Some(tc_deltas) = choice.delta.tool_calls {
633            for tc in tc_deltas {
634                results.push(SseProcessResult::ToolCallUpdate {
635                    index: tc.index,
636                    id: tc.id,
637                    name: tc.function.as_ref().and_then(|f| f.name.clone()),
638                    arguments: tc.function.as_ref().and_then(|f| f.arguments.clone()),
639                });
640            }
641        }
642
643        // Check for finish reason
644        if let Some(finish_reason) = choice.finish_reason {
645            results.push(SseProcessResult::Done(map_finish_reason(&finish_reason)));
646        }
647    }
648
649    results
650}
651
652fn use_max_tokens_alias(base_url: &str) -> bool {
653    base_url.contains("moonshot.ai")
654        || base_url.contains("api.z.ai")
655        || base_url.contains("minimax.io")
656}
657
658fn map_finish_reason(finish_reason: &str) -> StopReason {
659    match finish_reason {
660        "stop" => StopReason::EndTurn,
661        "tool_calls" => StopReason::ToolUse,
662        "length" => StopReason::MaxTokens,
663        "content_filter" | "network_error" => StopReason::StopSequence,
664        "sensitive" => StopReason::Refusal,
665        unknown => {
666            log::debug!("Unknown finish_reason from OpenAI-compatible API: {unknown}");
667            StopReason::StopSequence
668        }
669    }
670}
671
672fn build_api_chat_request<'a>(
673    model: &'a str,
674    messages: &'a [ApiMessage],
675    max_tokens: u32,
676    tools: Option<&'a [ApiTool]>,
677    reasoning: Option<ApiReasoning>,
678    include_max_tokens_alias: bool,
679) -> ApiChatRequest<'a> {
680    ApiChatRequest {
681        model,
682        messages,
683        max_completion_tokens: Some(max_tokens),
684        max_tokens: include_max_tokens_alias.then_some(max_tokens),
685        tools,
686        reasoning,
687    }
688}
689
690fn build_api_chat_request_streaming<'a>(
691    model: &'a str,
692    messages: &'a [ApiMessage],
693    max_tokens: u32,
694    tools: Option<&'a [ApiTool]>,
695    reasoning: Option<ApiReasoning>,
696    include_max_tokens_alias: bool,
697) -> ApiChatRequestStreaming<'a> {
698    ApiChatRequestStreaming {
699        model,
700        messages,
701        max_completion_tokens: Some(max_tokens),
702        max_tokens: include_max_tokens_alias.then_some(max_tokens),
703        tools,
704        reasoning,
705        stream: true,
706    }
707}
708
709fn build_api_reasoning(thinking: Option<&ThinkingConfig>) -> Option<ApiReasoning> {
710    thinking
711        .and_then(resolve_reasoning_effort)
712        .map(|effort| ApiReasoning { effort })
713}
714
715const fn resolve_reasoning_effort(config: &ThinkingConfig) -> Option<ReasoningEffort> {
716    if let Some(effort) = config.effort {
717        return Some(map_effort(effort));
718    }
719
720    match &config.mode {
721        ThinkingMode::Adaptive => None,
722        ThinkingMode::Enabled { budget_tokens } => Some(map_budget_to_reasoning(*budget_tokens)),
723    }
724}
725
726const fn map_effort(effort: Effort) -> ReasoningEffort {
727    match effort {
728        Effort::Low => ReasoningEffort::Low,
729        Effort::Medium => ReasoningEffort::Medium,
730        Effort::High => ReasoningEffort::High,
731        Effort::Max => ReasoningEffort::XHigh,
732    }
733}
734
735const fn map_budget_to_reasoning(budget_tokens: u32) -> ReasoningEffort {
736    if budget_tokens <= 4_096 {
737        ReasoningEffort::Low
738    } else if budget_tokens <= 16_384 {
739        ReasoningEffort::Medium
740    } else if budget_tokens <= 32_768 {
741        ReasoningEffort::High
742    } else {
743        ReasoningEffort::XHigh
744    }
745}
746
747fn build_api_messages(request: &ChatRequest) -> Vec<ApiMessage> {
748    let mut messages = Vec::new();
749
750    // Add system message first (OpenAI uses a separate message for system prompt)
751    if !request.system.is_empty() {
752        messages.push(ApiMessage {
753            role: ApiRole::System,
754            content: Some(request.system.clone()),
755            tool_calls: None,
756            tool_call_id: None,
757        });
758    }
759
760    // Convert SDK messages to OpenAI format
761    for msg in &request.messages {
762        match &msg.content {
763            Content::Text(text) => {
764                messages.push(ApiMessage {
765                    role: match msg.role {
766                        crate::llm::Role::User => ApiRole::User,
767                        crate::llm::Role::Assistant => ApiRole::Assistant,
768                    },
769                    content: Some(text.clone()),
770                    tool_calls: None,
771                    tool_call_id: None,
772                });
773            }
774            Content::Blocks(blocks) => {
775                // Handle mixed content blocks
776                let mut text_parts = Vec::new();
777                let mut tool_calls = Vec::new();
778
779                for block in blocks {
780                    match block {
781                        ContentBlock::Text { text } => {
782                            text_parts.push(text.clone());
783                        }
784                        ContentBlock::Thinking { .. }
785                        | ContentBlock::RedactedThinking { .. }
786                        | ContentBlock::Image { .. }
787                        | ContentBlock::Document { .. } => {
788                            // These blocks are not sent to the OpenAI API
789                        }
790                        ContentBlock::ToolUse {
791                            id, name, input, ..
792                        } => {
793                            tool_calls.push(ApiToolCall {
794                                id: id.clone(),
795                                r#type: "function".to_owned(),
796                                function: ApiFunctionCall {
797                                    name: name.clone(),
798                                    arguments: serde_json::to_string(input)
799                                        .unwrap_or_else(|_| "{}".to_owned()),
800                                },
801                            });
802                        }
803                        ContentBlock::ToolResult {
804                            tool_use_id,
805                            content,
806                            ..
807                        } => {
808                            // Tool results are separate messages in OpenAI
809                            messages.push(ApiMessage {
810                                role: ApiRole::Tool,
811                                content: Some(content.clone()),
812                                tool_calls: None,
813                                tool_call_id: Some(tool_use_id.clone()),
814                            });
815                        }
816                    }
817                }
818
819                // Add assistant message with text and/or tool calls
820                if !text_parts.is_empty() || !tool_calls.is_empty() {
821                    let role = match msg.role {
822                        crate::llm::Role::User => ApiRole::User,
823                        crate::llm::Role::Assistant => ApiRole::Assistant,
824                    };
825
826                    // Only add if it's an assistant message or has text content
827                    if role == ApiRole::Assistant || !text_parts.is_empty() {
828                        messages.push(ApiMessage {
829                            role,
830                            content: if text_parts.is_empty() {
831                                None
832                            } else {
833                                Some(text_parts.join("\n"))
834                            },
835                            tool_calls: if tool_calls.is_empty() {
836                                None
837                            } else {
838                                Some(tool_calls)
839                            },
840                            tool_call_id: None,
841                        });
842                    }
843                }
844            }
845        }
846    }
847
848    messages
849}
850
851fn convert_tool(t: crate::llm::Tool) -> ApiTool {
852    ApiTool {
853        r#type: "function".to_owned(),
854        function: ApiFunction {
855            name: t.name,
856            description: t.description,
857            parameters: t.input_schema,
858        },
859    }
860}
861
862fn build_content_blocks(message: &ApiResponseMessage) -> Vec<ContentBlock> {
863    let mut blocks = Vec::new();
864
865    // Add text content if present
866    if let Some(content) = &message.content
867        && !content.is_empty()
868    {
869        blocks.push(ContentBlock::Text {
870            text: content.clone(),
871        });
872    }
873
874    // Add tool calls if present
875    if let Some(tool_calls) = &message.tool_calls {
876        for tc in tool_calls {
877            let input: serde_json::Value = serde_json::from_str(&tc.function.arguments)
878                .unwrap_or_else(|_| serde_json::json!({}));
879            blocks.push(ContentBlock::ToolUse {
880                id: tc.id.clone(),
881                name: tc.function.name.clone(),
882                input,
883                thought_signature: None,
884            });
885        }
886    }
887
888    blocks
889}
890
891// ============================================================================
892// API Request Types
893// ============================================================================
894
895#[derive(Serialize)]
896struct ApiChatRequest<'a> {
897    model: &'a str,
898    messages: &'a [ApiMessage],
899    #[serde(skip_serializing_if = "Option::is_none")]
900    max_completion_tokens: Option<u32>,
901    #[serde(skip_serializing_if = "Option::is_none")]
902    max_tokens: Option<u32>,
903    #[serde(skip_serializing_if = "Option::is_none")]
904    tools: Option<&'a [ApiTool]>,
905    #[serde(skip_serializing_if = "Option::is_none")]
906    reasoning: Option<ApiReasoning>,
907}
908
909#[derive(Serialize)]
910struct ApiChatRequestStreaming<'a> {
911    model: &'a str,
912    messages: &'a [ApiMessage],
913    #[serde(skip_serializing_if = "Option::is_none")]
914    max_completion_tokens: Option<u32>,
915    #[serde(skip_serializing_if = "Option::is_none")]
916    max_tokens: Option<u32>,
917    #[serde(skip_serializing_if = "Option::is_none")]
918    tools: Option<&'a [ApiTool]>,
919    #[serde(skip_serializing_if = "Option::is_none")]
920    reasoning: Option<ApiReasoning>,
921    stream: bool,
922}
923
924#[derive(Clone, Copy, Serialize)]
925#[serde(rename_all = "lowercase")]
926enum ReasoningEffort {
927    Low,
928    Medium,
929    High,
930    #[serde(rename = "xhigh")]
931    XHigh,
932}
933
934#[derive(Serialize)]
935struct ApiReasoning {
936    effort: ReasoningEffort,
937}
938
939#[derive(Serialize)]
940struct ApiMessage {
941    role: ApiRole,
942    #[serde(skip_serializing_if = "Option::is_none")]
943    content: Option<String>,
944    #[serde(skip_serializing_if = "Option::is_none")]
945    tool_calls: Option<Vec<ApiToolCall>>,
946    #[serde(skip_serializing_if = "Option::is_none")]
947    tool_call_id: Option<String>,
948}
949
950#[derive(Debug, Serialize, PartialEq, Eq)]
951#[serde(rename_all = "lowercase")]
952enum ApiRole {
953    System,
954    User,
955    Assistant,
956    Tool,
957}
958
959#[derive(Serialize)]
960struct ApiToolCall {
961    id: String,
962    r#type: String,
963    function: ApiFunctionCall,
964}
965
966#[derive(Serialize)]
967struct ApiFunctionCall {
968    name: String,
969    arguments: String,
970}
971
972#[derive(Serialize)]
973struct ApiTool {
974    r#type: String,
975    function: ApiFunction,
976}
977
978#[derive(Serialize)]
979struct ApiFunction {
980    name: String,
981    description: String,
982    parameters: serde_json::Value,
983}
984
985// ============================================================================
986// API Response Types
987// ============================================================================
988
989#[derive(Deserialize)]
990struct ApiChatResponse {
991    id: String,
992    choices: Vec<ApiChoice>,
993    model: String,
994    usage: ApiUsage,
995}
996
997#[derive(Deserialize)]
998struct ApiChoice {
999    message: ApiResponseMessage,
1000    finish_reason: Option<String>,
1001}
1002
1003#[derive(Deserialize)]
1004struct ApiResponseMessage {
1005    content: Option<String>,
1006    tool_calls: Option<Vec<ApiResponseToolCall>>,
1007}
1008
1009#[derive(Deserialize)]
1010struct ApiResponseToolCall {
1011    id: String,
1012    function: ApiResponseFunctionCall,
1013}
1014
1015#[derive(Deserialize)]
1016struct ApiResponseFunctionCall {
1017    name: String,
1018    arguments: String,
1019}
1020
1021#[derive(Deserialize)]
1022struct ApiUsage {
1023    #[serde(deserialize_with = "deserialize_u32_from_number")]
1024    prompt_tokens: u32,
1025    #[serde(deserialize_with = "deserialize_u32_from_number")]
1026    completion_tokens: u32,
1027}
1028
1029// ============================================================================
1030// SSE Streaming Types
1031// ============================================================================
1032
1033/// Accumulator for tool call state across stream deltas.
1034struct ToolCallAccumulator {
1035    id: String,
1036    name: String,
1037    arguments: String,
1038}
1039
1040/// A single chunk in `OpenAI`'s SSE stream.
1041#[derive(Deserialize)]
1042struct SseChunk {
1043    choices: Vec<SseChoice>,
1044    #[serde(default)]
1045    usage: Option<SseUsage>,
1046}
1047
1048#[derive(Deserialize)]
1049struct SseChoice {
1050    delta: SseDelta,
1051    finish_reason: Option<String>,
1052}
1053
1054#[derive(Deserialize)]
1055struct SseDelta {
1056    content: Option<String>,
1057    tool_calls: Option<Vec<SseToolCallDelta>>,
1058}
1059
1060#[derive(Deserialize)]
1061struct SseToolCallDelta {
1062    index: usize,
1063    id: Option<String>,
1064    function: Option<SseFunctionDelta>,
1065}
1066
1067#[derive(Deserialize)]
1068struct SseFunctionDelta {
1069    name: Option<String>,
1070    arguments: Option<String>,
1071}
1072
1073#[derive(Deserialize)]
1074struct SseUsage {
1075    #[serde(deserialize_with = "deserialize_u32_from_number")]
1076    prompt_tokens: u32,
1077    #[serde(deserialize_with = "deserialize_u32_from_number")]
1078    completion_tokens: u32,
1079}
1080
1081fn deserialize_u32_from_number<'de, D>(deserializer: D) -> std::result::Result<u32, D::Error>
1082where
1083    D: serde::Deserializer<'de>,
1084{
1085    #[derive(Deserialize)]
1086    #[serde(untagged)]
1087    enum NumberLike {
1088        U64(u64),
1089        F64(f64),
1090    }
1091
1092    match NumberLike::deserialize(deserializer)? {
1093        NumberLike::U64(v) => u32::try_from(v)
1094            .map_err(|_| D::Error::custom(format!("token count out of range for u32: {v}"))),
1095        NumberLike::F64(v) => {
1096            if v.is_finite() && v >= 0.0 && v.fract() == 0.0 && v <= f64::from(u32::MAX) {
1097                v.to_string().parse::<u32>().map_err(|e| {
1098                    D::Error::custom(format!(
1099                        "failed to convert integer-compatible token count {v} to u32: {e}"
1100                    ))
1101                })
1102            } else {
1103                Err(D::Error::custom(format!(
1104                    "token count must be a non-negative integer-compatible number, got {v}"
1105                )))
1106            }
1107        }
1108    }
1109}
1110
1111#[cfg(test)]
1112mod tests {
1113    use super::*;
1114
1115    // ===================
1116    // Constructor Tests
1117    // ===================
1118
1119    #[test]
1120    fn test_new_creates_provider_with_custom_model() {
1121        let provider = OpenAIProvider::new("test-api-key".to_string(), "custom-model".to_string());
1122
1123        assert_eq!(provider.model(), "custom-model");
1124        assert_eq!(provider.provider(), "openai");
1125        assert_eq!(provider.base_url, DEFAULT_BASE_URL);
1126    }
1127
1128    #[test]
1129    fn test_with_base_url_creates_provider_with_custom_url() {
1130        let provider = OpenAIProvider::with_base_url(
1131            "test-api-key".to_string(),
1132            "llama3".to_string(),
1133            "http://localhost:11434/v1".to_string(),
1134        );
1135
1136        assert_eq!(provider.model(), "llama3");
1137        assert_eq!(provider.base_url, "http://localhost:11434/v1");
1138    }
1139
1140    #[test]
1141    fn test_gpt4o_factory_creates_gpt4o_provider() {
1142        let provider = OpenAIProvider::gpt4o("test-api-key".to_string());
1143
1144        assert_eq!(provider.model(), MODEL_GPT4O);
1145        assert_eq!(provider.provider(), "openai");
1146    }
1147
1148    #[test]
1149    fn test_gpt4o_mini_factory_creates_gpt4o_mini_provider() {
1150        let provider = OpenAIProvider::gpt4o_mini("test-api-key".to_string());
1151
1152        assert_eq!(provider.model(), MODEL_GPT4O_MINI);
1153        assert_eq!(provider.provider(), "openai");
1154    }
1155
1156    #[test]
1157    fn test_gpt52_thinking_factory_creates_provider() {
1158        let provider = OpenAIProvider::gpt52_thinking("test-api-key".to_string());
1159
1160        assert_eq!(provider.model(), MODEL_GPT52_THINKING);
1161        assert_eq!(provider.provider(), "openai");
1162    }
1163
1164    #[test]
1165    fn test_gpt54_factory_creates_provider() {
1166        let provider = OpenAIProvider::gpt54("test-api-key".to_string());
1167
1168        assert_eq!(provider.model(), MODEL_GPT54);
1169        assert_eq!(provider.provider(), "openai");
1170    }
1171
1172    #[test]
1173    fn test_gpt53_codex_factory_creates_provider() {
1174        let provider = OpenAIProvider::gpt53_codex("test-api-key".to_string());
1175
1176        assert_eq!(provider.model(), MODEL_GPT53_CODEX);
1177        assert_eq!(provider.provider(), "openai");
1178    }
1179
1180    #[test]
1181    fn test_codex_factory_points_to_latest_codex_model() {
1182        let provider = OpenAIProvider::codex("test-api-key".to_string());
1183
1184        assert_eq!(provider.model(), MODEL_GPT53_CODEX);
1185        assert_eq!(provider.provider(), "openai");
1186    }
1187
1188    #[test]
1189    fn test_gpt5_factory_creates_gpt5_provider() {
1190        let provider = OpenAIProvider::gpt5("test-api-key".to_string());
1191
1192        assert_eq!(provider.model(), MODEL_GPT5);
1193        assert_eq!(provider.provider(), "openai");
1194    }
1195
1196    #[test]
1197    fn test_gpt5_mini_factory_creates_provider() {
1198        let provider = OpenAIProvider::gpt5_mini("test-api-key".to_string());
1199
1200        assert_eq!(provider.model(), MODEL_GPT5_MINI);
1201        assert_eq!(provider.provider(), "openai");
1202    }
1203
1204    #[test]
1205    fn test_o3_factory_creates_o3_provider() {
1206        let provider = OpenAIProvider::o3("test-api-key".to_string());
1207
1208        assert_eq!(provider.model(), MODEL_O3);
1209        assert_eq!(provider.provider(), "openai");
1210    }
1211
1212    #[test]
1213    fn test_o4_mini_factory_creates_o4_mini_provider() {
1214        let provider = OpenAIProvider::o4_mini("test-api-key".to_string());
1215
1216        assert_eq!(provider.model(), MODEL_O4_MINI);
1217        assert_eq!(provider.provider(), "openai");
1218    }
1219
1220    #[test]
1221    fn test_o1_factory_creates_o1_provider() {
1222        let provider = OpenAIProvider::o1("test-api-key".to_string());
1223
1224        assert_eq!(provider.model(), MODEL_O1);
1225        assert_eq!(provider.provider(), "openai");
1226    }
1227
1228    #[test]
1229    fn test_gpt41_factory_creates_gpt41_provider() {
1230        let provider = OpenAIProvider::gpt41("test-api-key".to_string());
1231
1232        assert_eq!(provider.model(), MODEL_GPT41);
1233        assert_eq!(provider.provider(), "openai");
1234    }
1235
1236    #[test]
1237    fn test_kimi_factory_creates_provider_with_kimi_base_url() {
1238        let provider = OpenAIProvider::kimi("test-api-key".to_string(), "kimi-custom".to_string());
1239
1240        assert_eq!(provider.model(), "kimi-custom");
1241        assert_eq!(provider.base_url, BASE_URL_KIMI);
1242        assert_eq!(provider.provider(), "openai");
1243    }
1244
1245    #[test]
1246    fn test_kimi_k2_5_factory_creates_provider() {
1247        let provider = OpenAIProvider::kimi_k2_5("test-api-key".to_string());
1248
1249        assert_eq!(provider.model(), MODEL_KIMI_K2_5);
1250        assert_eq!(provider.base_url, BASE_URL_KIMI);
1251        assert_eq!(provider.provider(), "openai");
1252    }
1253
1254    #[test]
1255    fn test_kimi_k2_thinking_factory_creates_provider() {
1256        let provider = OpenAIProvider::kimi_k2_thinking("test-api-key".to_string());
1257
1258        assert_eq!(provider.model(), MODEL_KIMI_K2_THINKING);
1259        assert_eq!(provider.base_url, BASE_URL_KIMI);
1260        assert_eq!(provider.provider(), "openai");
1261    }
1262
1263    #[test]
1264    fn test_zai_factory_creates_provider_with_zai_base_url() {
1265        let provider = OpenAIProvider::zai("test-api-key".to_string(), "glm-custom".to_string());
1266
1267        assert_eq!(provider.model(), "glm-custom");
1268        assert_eq!(provider.base_url, BASE_URL_ZAI);
1269        assert_eq!(provider.provider(), "openai");
1270    }
1271
1272    #[test]
1273    fn test_zai_glm5_factory_creates_provider() {
1274        let provider = OpenAIProvider::zai_glm5("test-api-key".to_string());
1275
1276        assert_eq!(provider.model(), MODEL_ZAI_GLM5);
1277        assert_eq!(provider.base_url, BASE_URL_ZAI);
1278        assert_eq!(provider.provider(), "openai");
1279    }
1280
1281    #[test]
1282    fn test_minimax_factory_creates_provider_with_minimax_base_url() {
1283        let provider =
1284            OpenAIProvider::minimax("test-api-key".to_string(), "minimax-custom".to_string());
1285
1286        assert_eq!(provider.model(), "minimax-custom");
1287        assert_eq!(provider.base_url, BASE_URL_MINIMAX);
1288        assert_eq!(provider.provider(), "openai");
1289    }
1290
1291    #[test]
1292    fn test_minimax_m2_5_factory_creates_provider() {
1293        let provider = OpenAIProvider::minimax_m2_5("test-api-key".to_string());
1294
1295        assert_eq!(provider.model(), MODEL_MINIMAX_M2_5);
1296        assert_eq!(provider.base_url, BASE_URL_MINIMAX);
1297        assert_eq!(provider.provider(), "openai");
1298    }
1299
1300    // ===================
1301    // Model Constants Tests
1302    // ===================
1303
1304    #[test]
1305    fn test_model_constants_have_expected_values() {
1306        // GPT-5.4 / GPT-5.3 Codex
1307        assert_eq!(MODEL_GPT54, "gpt-5.4");
1308        assert_eq!(MODEL_GPT53_CODEX, "gpt-5.3-codex");
1309        // GPT-5.2 series
1310        assert_eq!(MODEL_GPT52_INSTANT, "gpt-5.2-instant");
1311        assert_eq!(MODEL_GPT52_THINKING, "gpt-5.2-thinking");
1312        assert_eq!(MODEL_GPT52_PRO, "gpt-5.2-pro");
1313        assert_eq!(MODEL_GPT52_CODEX, "gpt-5.2-codex");
1314        // GPT-5 series
1315        assert_eq!(MODEL_GPT5, "gpt-5");
1316        assert_eq!(MODEL_GPT5_MINI, "gpt-5-mini");
1317        assert_eq!(MODEL_GPT5_NANO, "gpt-5-nano");
1318        // o-series
1319        assert_eq!(MODEL_O3, "o3");
1320        assert_eq!(MODEL_O3_MINI, "o3-mini");
1321        assert_eq!(MODEL_O4_MINI, "o4-mini");
1322        assert_eq!(MODEL_O1, "o1");
1323        assert_eq!(MODEL_O1_MINI, "o1-mini");
1324        // GPT-4.1 series
1325        assert_eq!(MODEL_GPT41, "gpt-4.1");
1326        assert_eq!(MODEL_GPT41_MINI, "gpt-4.1-mini");
1327        assert_eq!(MODEL_GPT41_NANO, "gpt-4.1-nano");
1328        // GPT-4o series
1329        assert_eq!(MODEL_GPT4O, "gpt-4o");
1330        assert_eq!(MODEL_GPT4O_MINI, "gpt-4o-mini");
1331        // OpenAI-compatible vendor defaults
1332        assert_eq!(MODEL_KIMI_K2_5, "kimi-k2.5");
1333        assert_eq!(MODEL_KIMI_K2_THINKING, "kimi-k2-thinking");
1334        assert_eq!(MODEL_ZAI_GLM5, "glm-5");
1335        assert_eq!(MODEL_MINIMAX_M2_5, "MiniMax-M2.5");
1336        assert_eq!(BASE_URL_KIMI, "https://api.moonshot.ai/v1");
1337        assert_eq!(BASE_URL_ZAI, "https://api.z.ai/api/paas/v4");
1338        assert_eq!(BASE_URL_MINIMAX, "https://api.minimax.io/v1");
1339    }
1340
1341    // ===================
1342    // Clone Tests
1343    // ===================
1344
1345    #[test]
1346    fn test_provider_is_cloneable() {
1347        let provider = OpenAIProvider::new("test-api-key".to_string(), "test-model".to_string());
1348        let cloned = provider.clone();
1349
1350        assert_eq!(provider.model(), cloned.model());
1351        assert_eq!(provider.provider(), cloned.provider());
1352        assert_eq!(provider.base_url, cloned.base_url);
1353    }
1354
1355    // ===================
1356    // API Type Serialization Tests
1357    // ===================
1358
1359    #[test]
1360    fn test_api_role_serialization() {
1361        let system_role = ApiRole::System;
1362        let user_role = ApiRole::User;
1363        let assistant_role = ApiRole::Assistant;
1364        let tool_role = ApiRole::Tool;
1365
1366        assert_eq!(serde_json::to_string(&system_role).unwrap(), "\"system\"");
1367        assert_eq!(serde_json::to_string(&user_role).unwrap(), "\"user\"");
1368        assert_eq!(
1369            serde_json::to_string(&assistant_role).unwrap(),
1370            "\"assistant\""
1371        );
1372        assert_eq!(serde_json::to_string(&tool_role).unwrap(), "\"tool\"");
1373    }
1374
1375    #[test]
1376    fn test_api_message_serialization_simple() {
1377        let message = ApiMessage {
1378            role: ApiRole::User,
1379            content: Some("Hello, world!".to_string()),
1380            tool_calls: None,
1381            tool_call_id: None,
1382        };
1383
1384        let json = serde_json::to_string(&message).unwrap();
1385        assert!(json.contains("\"role\":\"user\""));
1386        assert!(json.contains("\"content\":\"Hello, world!\""));
1387        // Optional fields should be omitted
1388        assert!(!json.contains("tool_calls"));
1389        assert!(!json.contains("tool_call_id"));
1390    }
1391
1392    #[test]
1393    fn test_api_message_serialization_with_tool_calls() {
1394        let message = ApiMessage {
1395            role: ApiRole::Assistant,
1396            content: Some("Let me help.".to_string()),
1397            tool_calls: Some(vec![ApiToolCall {
1398                id: "call_123".to_string(),
1399                r#type: "function".to_string(),
1400                function: ApiFunctionCall {
1401                    name: "read_file".to_string(),
1402                    arguments: "{\"path\": \"/test.txt\"}".to_string(),
1403                },
1404            }]),
1405            tool_call_id: None,
1406        };
1407
1408        let json = serde_json::to_string(&message).unwrap();
1409        assert!(json.contains("\"role\":\"assistant\""));
1410        assert!(json.contains("\"tool_calls\""));
1411        assert!(json.contains("\"id\":\"call_123\""));
1412        assert!(json.contains("\"type\":\"function\""));
1413        assert!(json.contains("\"name\":\"read_file\""));
1414    }
1415
1416    #[test]
1417    fn test_api_tool_message_serialization() {
1418        let message = ApiMessage {
1419            role: ApiRole::Tool,
1420            content: Some("File contents here".to_string()),
1421            tool_calls: None,
1422            tool_call_id: Some("call_123".to_string()),
1423        };
1424
1425        let json = serde_json::to_string(&message).unwrap();
1426        assert!(json.contains("\"role\":\"tool\""));
1427        assert!(json.contains("\"tool_call_id\":\"call_123\""));
1428        assert!(json.contains("\"content\":\"File contents here\""));
1429    }
1430
1431    #[test]
1432    fn test_api_tool_serialization() {
1433        let tool = ApiTool {
1434            r#type: "function".to_string(),
1435            function: ApiFunction {
1436                name: "test_tool".to_string(),
1437                description: "A test tool".to_string(),
1438                parameters: serde_json::json!({
1439                    "type": "object",
1440                    "properties": {
1441                        "arg": {"type": "string"}
1442                    }
1443                }),
1444            },
1445        };
1446
1447        let json = serde_json::to_string(&tool).unwrap();
1448        assert!(json.contains("\"type\":\"function\""));
1449        assert!(json.contains("\"name\":\"test_tool\""));
1450        assert!(json.contains("\"description\":\"A test tool\""));
1451        assert!(json.contains("\"parameters\""));
1452    }
1453
1454    // ===================
1455    // API Type Deserialization Tests
1456    // ===================
1457
1458    #[test]
1459    fn test_api_response_deserialization() {
1460        let json = r#"{
1461            "id": "chatcmpl-123",
1462            "choices": [
1463                {
1464                    "message": {
1465                        "content": "Hello!"
1466                    },
1467                    "finish_reason": "stop"
1468                }
1469            ],
1470            "model": "gpt-4o",
1471            "usage": {
1472                "prompt_tokens": 100,
1473                "completion_tokens": 50
1474            }
1475        }"#;
1476
1477        let response: ApiChatResponse = serde_json::from_str(json).unwrap();
1478        assert_eq!(response.id, "chatcmpl-123");
1479        assert_eq!(response.model, "gpt-4o");
1480        assert_eq!(response.usage.prompt_tokens, 100);
1481        assert_eq!(response.usage.completion_tokens, 50);
1482        assert_eq!(response.choices.len(), 1);
1483        assert_eq!(
1484            response.choices[0].message.content,
1485            Some("Hello!".to_string())
1486        );
1487    }
1488
1489    #[test]
1490    fn test_api_response_with_tool_calls_deserialization() {
1491        let json = r#"{
1492            "id": "chatcmpl-456",
1493            "choices": [
1494                {
1495                    "message": {
1496                        "content": null,
1497                        "tool_calls": [
1498                            {
1499                                "id": "call_abc",
1500                                "type": "function",
1501                                "function": {
1502                                    "name": "read_file",
1503                                    "arguments": "{\"path\": \"test.txt\"}"
1504                                }
1505                            }
1506                        ]
1507                    },
1508                    "finish_reason": "tool_calls"
1509                }
1510            ],
1511            "model": "gpt-4o",
1512            "usage": {
1513                "prompt_tokens": 150,
1514                "completion_tokens": 30
1515            }
1516        }"#;
1517
1518        let response: ApiChatResponse = serde_json::from_str(json).unwrap();
1519        let tool_calls = response.choices[0].message.tool_calls.as_ref().unwrap();
1520        assert_eq!(tool_calls.len(), 1);
1521        assert_eq!(tool_calls[0].id, "call_abc");
1522        assert_eq!(tool_calls[0].function.name, "read_file");
1523    }
1524
1525    #[test]
1526    fn test_api_response_with_unknown_finish_reason_deserialization() {
1527        let json = r#"{
1528            "id": "chatcmpl-789",
1529            "choices": [
1530                {
1531                    "message": {
1532                        "content": "ok"
1533                    },
1534                    "finish_reason": "vendor_custom_reason"
1535                }
1536            ],
1537            "model": "glm-5",
1538            "usage": {
1539                "prompt_tokens": 10,
1540                "completion_tokens": 5
1541            }
1542        }"#;
1543
1544        let response: ApiChatResponse = serde_json::from_str(json).unwrap();
1545        assert_eq!(
1546            response.choices[0].finish_reason.as_deref(),
1547            Some("vendor_custom_reason")
1548        );
1549        assert_eq!(
1550            map_finish_reason(response.choices[0].finish_reason.as_deref().unwrap()),
1551            StopReason::StopSequence
1552        );
1553    }
1554
1555    #[test]
1556    fn test_map_finish_reason_covers_vendor_specific_values() {
1557        assert_eq!(map_finish_reason("stop"), StopReason::EndTurn);
1558        assert_eq!(map_finish_reason("tool_calls"), StopReason::ToolUse);
1559        assert_eq!(map_finish_reason("length"), StopReason::MaxTokens);
1560        assert_eq!(
1561            map_finish_reason("content_filter"),
1562            StopReason::StopSequence
1563        );
1564        assert_eq!(map_finish_reason("sensitive"), StopReason::Refusal);
1565        assert_eq!(map_finish_reason("network_error"), StopReason::StopSequence);
1566        assert_eq!(
1567            map_finish_reason("some_new_reason"),
1568            StopReason::StopSequence
1569        );
1570    }
1571
1572    // ===================
1573    // Message Conversion Tests
1574    // ===================
1575
1576    #[test]
1577    fn test_build_api_messages_with_system() {
1578        let request = ChatRequest {
1579            system: "You are helpful.".to_string(),
1580            messages: vec![crate::llm::Message::user("Hello")],
1581            tools: None,
1582            max_tokens: 1024,
1583            thinking: None,
1584        };
1585
1586        let api_messages = build_api_messages(&request);
1587        assert_eq!(api_messages.len(), 2);
1588        assert_eq!(api_messages[0].role, ApiRole::System);
1589        assert_eq!(
1590            api_messages[0].content,
1591            Some("You are helpful.".to_string())
1592        );
1593        assert_eq!(api_messages[1].role, ApiRole::User);
1594        assert_eq!(api_messages[1].content, Some("Hello".to_string()));
1595    }
1596
1597    #[test]
1598    fn test_build_api_messages_empty_system() {
1599        let request = ChatRequest {
1600            system: String::new(),
1601            messages: vec![crate::llm::Message::user("Hello")],
1602            tools: None,
1603            max_tokens: 1024,
1604            thinking: None,
1605        };
1606
1607        let api_messages = build_api_messages(&request);
1608        assert_eq!(api_messages.len(), 1);
1609        assert_eq!(api_messages[0].role, ApiRole::User);
1610    }
1611
1612    #[test]
1613    fn test_convert_tool() {
1614        let tool = crate::llm::Tool {
1615            name: "test_tool".to_string(),
1616            description: "A test tool".to_string(),
1617            input_schema: serde_json::json!({"type": "object"}),
1618        };
1619
1620        let api_tool = convert_tool(tool);
1621        assert_eq!(api_tool.r#type, "function");
1622        assert_eq!(api_tool.function.name, "test_tool");
1623        assert_eq!(api_tool.function.description, "A test tool");
1624    }
1625
1626    #[test]
1627    fn test_build_content_blocks_text_only() {
1628        let message = ApiResponseMessage {
1629            content: Some("Hello!".to_string()),
1630            tool_calls: None,
1631        };
1632
1633        let blocks = build_content_blocks(&message);
1634        assert_eq!(blocks.len(), 1);
1635        assert!(matches!(&blocks[0], ContentBlock::Text { text } if text == "Hello!"));
1636    }
1637
1638    #[test]
1639    fn test_build_content_blocks_with_tool_calls() {
1640        let message = ApiResponseMessage {
1641            content: Some("Let me help.".to_string()),
1642            tool_calls: Some(vec![ApiResponseToolCall {
1643                id: "call_123".to_string(),
1644                function: ApiResponseFunctionCall {
1645                    name: "read_file".to_string(),
1646                    arguments: "{\"path\": \"test.txt\"}".to_string(),
1647                },
1648            }]),
1649        };
1650
1651        let blocks = build_content_blocks(&message);
1652        assert_eq!(blocks.len(), 2);
1653        assert!(matches!(&blocks[0], ContentBlock::Text { text } if text == "Let me help."));
1654        assert!(
1655            matches!(&blocks[1], ContentBlock::ToolUse { id, name, .. } if id == "call_123" && name == "read_file")
1656        );
1657    }
1658
1659    // ===================
1660    // SSE Streaming Type Tests
1661    // ===================
1662
1663    #[test]
1664    fn test_sse_chunk_text_delta_deserialization() {
1665        let json = r#"{
1666            "choices": [{
1667                "delta": {
1668                    "content": "Hello"
1669                },
1670                "finish_reason": null
1671            }]
1672        }"#;
1673
1674        let chunk: SseChunk = serde_json::from_str(json).unwrap();
1675        assert_eq!(chunk.choices.len(), 1);
1676        assert_eq!(chunk.choices[0].delta.content, Some("Hello".to_string()));
1677        assert!(chunk.choices[0].finish_reason.is_none());
1678    }
1679
1680    #[test]
1681    fn test_sse_chunk_tool_call_delta_deserialization() {
1682        let json = r#"{
1683            "choices": [{
1684                "delta": {
1685                    "tool_calls": [{
1686                        "index": 0,
1687                        "id": "call_abc",
1688                        "function": {
1689                            "name": "read_file",
1690                            "arguments": ""
1691                        }
1692                    }]
1693                },
1694                "finish_reason": null
1695            }]
1696        }"#;
1697
1698        let chunk: SseChunk = serde_json::from_str(json).unwrap();
1699        let tool_calls = chunk.choices[0].delta.tool_calls.as_ref().unwrap();
1700        assert_eq!(tool_calls.len(), 1);
1701        assert_eq!(tool_calls[0].index, 0);
1702        assert_eq!(tool_calls[0].id, Some("call_abc".to_string()));
1703        assert_eq!(
1704            tool_calls[0].function.as_ref().unwrap().name,
1705            Some("read_file".to_string())
1706        );
1707    }
1708
1709    #[test]
1710    fn test_sse_chunk_tool_call_arguments_delta_deserialization() {
1711        let json = r#"{
1712            "choices": [{
1713                "delta": {
1714                    "tool_calls": [{
1715                        "index": 0,
1716                        "function": {
1717                            "arguments": "{\"path\":"
1718                        }
1719                    }]
1720                },
1721                "finish_reason": null
1722            }]
1723        }"#;
1724
1725        let chunk: SseChunk = serde_json::from_str(json).unwrap();
1726        let tool_calls = chunk.choices[0].delta.tool_calls.as_ref().unwrap();
1727        assert_eq!(tool_calls[0].id, None);
1728        assert_eq!(
1729            tool_calls[0].function.as_ref().unwrap().arguments,
1730            Some("{\"path\":".to_string())
1731        );
1732    }
1733
1734    #[test]
1735    fn test_sse_chunk_with_finish_reason_deserialization() {
1736        let json = r#"{
1737            "choices": [{
1738                "delta": {},
1739                "finish_reason": "stop"
1740            }]
1741        }"#;
1742
1743        let chunk: SseChunk = serde_json::from_str(json).unwrap();
1744        assert_eq!(chunk.choices[0].finish_reason.as_deref(), Some("stop"));
1745    }
1746
1747    #[test]
1748    fn test_sse_chunk_with_usage_deserialization() {
1749        let json = r#"{
1750            "choices": [{
1751                "delta": {},
1752                "finish_reason": "stop"
1753            }],
1754            "usage": {
1755                "prompt_tokens": 100,
1756                "completion_tokens": 50
1757            }
1758        }"#;
1759
1760        let chunk: SseChunk = serde_json::from_str(json).unwrap();
1761        let usage = chunk.usage.unwrap();
1762        assert_eq!(usage.prompt_tokens, 100);
1763        assert_eq!(usage.completion_tokens, 50);
1764    }
1765
1766    #[test]
1767    fn test_sse_chunk_with_float_usage_deserialization() {
1768        let json = r#"{
1769            "choices": [{
1770                "delta": {},
1771                "finish_reason": "stop"
1772            }],
1773            "usage": {
1774                "prompt_tokens": 100.0,
1775                "completion_tokens": 50.0
1776            }
1777        }"#;
1778
1779        let chunk: SseChunk = serde_json::from_str(json).unwrap();
1780        let usage = chunk.usage.unwrap();
1781        assert_eq!(usage.prompt_tokens, 100);
1782        assert_eq!(usage.completion_tokens, 50);
1783    }
1784
1785    #[test]
1786    fn test_api_usage_deserializes_integer_compatible_numbers() {
1787        let json = r#"{
1788            "prompt_tokens": 42.0,
1789            "completion_tokens": 7
1790        }"#;
1791
1792        let usage: ApiUsage = serde_json::from_str(json).unwrap();
1793        assert_eq!(usage.prompt_tokens, 42);
1794        assert_eq!(usage.completion_tokens, 7);
1795    }
1796
1797    #[test]
1798    fn test_api_usage_rejects_fractional_numbers() {
1799        let json = r#"{
1800            "prompt_tokens": 42.5,
1801            "completion_tokens": 7
1802        }"#;
1803
1804        let usage: std::result::Result<ApiUsage, _> = serde_json::from_str(json);
1805        assert!(usage.is_err());
1806    }
1807
1808    #[test]
1809    fn test_use_max_tokens_alias_for_vendor_urls() {
1810        assert!(!use_max_tokens_alias(DEFAULT_BASE_URL));
1811        assert!(use_max_tokens_alias(BASE_URL_KIMI));
1812        assert!(use_max_tokens_alias(BASE_URL_ZAI));
1813        assert!(use_max_tokens_alias(BASE_URL_MINIMAX));
1814    }
1815
1816    #[test]
1817    fn test_requires_responses_api_only_for_legacy_codex_model() {
1818        assert!(requires_responses_api(MODEL_GPT52_CODEX));
1819        assert!(!requires_responses_api(MODEL_GPT53_CODEX));
1820        assert!(!requires_responses_api(MODEL_GPT54));
1821    }
1822
1823    #[test]
1824    fn test_build_api_reasoning_maps_enabled_budget_to_effort() {
1825        let reasoning = build_api_reasoning(Some(&ThinkingConfig::new(40_000))).unwrap();
1826        assert!(matches!(reasoning.effort, ReasoningEffort::XHigh));
1827    }
1828
1829    #[test]
1830    fn test_build_api_reasoning_uses_explicit_effort() {
1831        let reasoning =
1832            build_api_reasoning(Some(&ThinkingConfig::adaptive_with_effort(Effort::High))).unwrap();
1833        assert!(matches!(reasoning.effort, ReasoningEffort::High));
1834    }
1835
1836    #[test]
1837    fn test_build_api_reasoning_omits_adaptive_without_effort() {
1838        assert!(build_api_reasoning(Some(&ThinkingConfig::adaptive())).is_none());
1839    }
1840
1841    #[test]
1842    fn test_openai_rejects_adaptive_thinking() {
1843        let provider = OpenAIProvider::gpt54("test-key".to_string());
1844        let error = provider
1845            .validate_thinking_config(Some(&ThinkingConfig::adaptive()))
1846            .unwrap_err();
1847        assert!(
1848            error
1849                .to_string()
1850                .contains("adaptive thinking is not supported")
1851        );
1852    }
1853
1854    #[test]
1855    fn test_request_serialization_openai_uses_max_completion_tokens_only() {
1856        let messages = vec![ApiMessage {
1857            role: ApiRole::User,
1858            content: Some("Hello".to_string()),
1859            tool_calls: None,
1860            tool_call_id: None,
1861        }];
1862
1863        let request = ApiChatRequest {
1864            model: "gpt-4o",
1865            messages: &messages,
1866            max_completion_tokens: Some(1024),
1867            max_tokens: None,
1868            tools: None,
1869            reasoning: None,
1870        };
1871
1872        let json = serde_json::to_string(&request).unwrap();
1873        assert!(json.contains("\"max_completion_tokens\":1024"));
1874        assert!(!json.contains("\"max_tokens\""));
1875    }
1876
1877    #[test]
1878    fn test_request_serialization_with_max_tokens_alias() {
1879        let messages = vec![ApiMessage {
1880            role: ApiRole::User,
1881            content: Some("Hello".to_string()),
1882            tool_calls: None,
1883            tool_call_id: None,
1884        }];
1885
1886        let request = ApiChatRequest {
1887            model: "glm-5",
1888            messages: &messages,
1889            max_completion_tokens: Some(1024),
1890            max_tokens: Some(1024),
1891            tools: None,
1892            reasoning: None,
1893        };
1894
1895        let json = serde_json::to_string(&request).unwrap();
1896        assert!(json.contains("\"max_completion_tokens\":1024"));
1897        assert!(json.contains("\"max_tokens\":1024"));
1898    }
1899
1900    #[test]
1901    fn test_streaming_request_serialization_openai_default() {
1902        let messages = vec![ApiMessage {
1903            role: ApiRole::User,
1904            content: Some("Hello".to_string()),
1905            tool_calls: None,
1906            tool_call_id: None,
1907        }];
1908
1909        let request = ApiChatRequestStreaming {
1910            model: "gpt-4o",
1911            messages: &messages,
1912            max_completion_tokens: Some(1024),
1913            max_tokens: None,
1914            tools: None,
1915            reasoning: None,
1916            stream: true,
1917        };
1918
1919        let json = serde_json::to_string(&request).unwrap();
1920        assert!(json.contains("\"stream\":true"));
1921        assert!(json.contains("\"model\":\"gpt-4o\""));
1922        assert!(json.contains("\"max_completion_tokens\":1024"));
1923        assert!(!json.contains("\"max_tokens\""));
1924    }
1925
1926    #[test]
1927    fn test_streaming_request_serialization_with_max_tokens_alias() {
1928        let messages = vec![ApiMessage {
1929            role: ApiRole::User,
1930            content: Some("Hello".to_string()),
1931            tool_calls: None,
1932            tool_call_id: None,
1933        }];
1934
1935        let request = ApiChatRequestStreaming {
1936            model: "kimi-k2-thinking",
1937            messages: &messages,
1938            max_completion_tokens: Some(1024),
1939            max_tokens: Some(1024),
1940            tools: None,
1941            reasoning: None,
1942            stream: true,
1943        };
1944
1945        let json = serde_json::to_string(&request).unwrap();
1946        assert!(json.contains("\"max_completion_tokens\":1024"));
1947        assert!(json.contains("\"max_tokens\":1024"));
1948    }
1949
1950    #[test]
1951    fn test_request_serialization_includes_reasoning_when_present() {
1952        let messages = vec![ApiMessage {
1953            role: ApiRole::User,
1954            content: Some("Hello".to_string()),
1955            tool_calls: None,
1956            tool_call_id: None,
1957        }];
1958
1959        let request = ApiChatRequest {
1960            model: MODEL_GPT54,
1961            messages: &messages,
1962            max_completion_tokens: Some(1024),
1963            max_tokens: None,
1964            tools: None,
1965            reasoning: Some(ApiReasoning {
1966                effort: ReasoningEffort::High,
1967            }),
1968        };
1969
1970        let json = serde_json::to_string(&request).unwrap();
1971        assert!(json.contains("\"reasoning\":{\"effort\":\"high\"}"));
1972    }
1973}