Skip to main content

agent_sdk_providers/impls/
openai.rs

1//! `OpenAI` API provider implementation.
2//!
3//! This module provides an implementation of `LlmProvider` for the `OpenAI`
4//! Chat Completions API. It also supports `OpenAI`-compatible APIs (Ollama, vLLM, etc.)
5//! via the `with_base_url` constructor.
6//!
7//! Legacy models that require the Responses API (like `gpt-5.2-codex`) are automatically
8//! routed to the correct endpoint.
9
10use crate::attachments::{request_has_attachments, validate_request_attachments};
11use crate::provider::LlmProvider;
12use crate::streaming::{StreamBox, StreamDelta, StreamErrorKind};
13use agent_sdk_foundation::llm::{
14    ChatOutcome, ChatRequest, ChatResponse, Content, ContentBlock, Effort, StopReason,
15    ThinkingConfig, ThinkingMode, Usage,
16};
17use anyhow::Result;
18use async_trait::async_trait;
19use futures::StreamExt;
20use reqwest::StatusCode;
21use serde::de::Error as _;
22use serde::{Deserialize, Serialize};
23
24use super::openai_responses::OpenAIResponsesProvider;
25
26const DEFAULT_BASE_URL: &str = "https://api.openai.com/v1";
27
28/// Check if a model requires the Responses API instead of Chat Completions.
29fn requires_responses_api(model: &str) -> bool {
30    model == MODEL_GPT52_CODEX
31}
32
33fn is_official_openai_base_url(base_url: &str) -> bool {
34    base_url == DEFAULT_BASE_URL || base_url.contains("api.openai.com")
35}
36
37fn request_is_agentic(request: &ChatRequest) -> bool {
38    request
39        .tools
40        .as_ref()
41        .is_some_and(|tools| !tools.is_empty()) || request.messages.iter().any(|message| {
42        matches!(
43            &message.content,
44            Content::Blocks(blocks)
45                if blocks.iter().any(|block| {
46                    matches!(block, ContentBlock::ToolUse { .. } | ContentBlock::ToolResult { .. })
47                })
48        )
49    })
50}
51
52fn should_use_responses_api(base_url: &str, model: &str, request: &ChatRequest) -> bool {
53    requires_responses_api(model)
54        || request_has_attachments(request)
55        || (is_official_openai_base_url(base_url) && request_is_agentic(request))
56}
57
58// GPT-5.4 series
59pub const MODEL_GPT54: &str = "gpt-5.4";
60
61// GPT-5.3 Codex series
62pub const MODEL_GPT53_CODEX: &str = "gpt-5.3-codex";
63
64// GPT-5.2 series
65pub const MODEL_GPT52_INSTANT: &str = "gpt-5.2-instant";
66pub const MODEL_GPT52_THINKING: &str = "gpt-5.2-thinking";
67pub const MODEL_GPT52_PRO: &str = "gpt-5.2-pro";
68pub const MODEL_GPT52_CODEX: &str = "gpt-5.2-codex";
69
70// GPT-5 series (400k context)
71pub const MODEL_GPT5: &str = "gpt-5";
72pub const MODEL_GPT5_MINI: &str = "gpt-5-mini";
73pub const MODEL_GPT5_NANO: &str = "gpt-5-nano";
74
75// o-series reasoning models
76pub const MODEL_O3: &str = "o3";
77pub const MODEL_O3_MINI: &str = "o3-mini";
78pub const MODEL_O4_MINI: &str = "o4-mini";
79pub const MODEL_O1: &str = "o1";
80pub const MODEL_O1_MINI: &str = "o1-mini";
81
82// GPT-4.1 series (improved instruction following, 1M context)
83pub const MODEL_GPT41: &str = "gpt-4.1";
84pub const MODEL_GPT41_MINI: &str = "gpt-4.1-mini";
85pub const MODEL_GPT41_NANO: &str = "gpt-4.1-nano";
86
87// GPT-4o series
88pub const MODEL_GPT4O: &str = "gpt-4o";
89pub const MODEL_GPT4O_MINI: &str = "gpt-4o-mini";
90
91// OpenAI-compatible vendor defaults
92pub const BASE_URL_KIMI: &str = "https://api.moonshot.ai/v1";
93pub const BASE_URL_ZAI: &str = "https://api.z.ai/api/paas/v4";
94pub const BASE_URL_MINIMAX: &str = "https://api.minimax.io/v1";
95pub const MODEL_KIMI_K2_5: &str = "kimi-k2.5";
96pub const MODEL_KIMI_K2_THINKING: &str = "kimi-k2-thinking";
97pub const MODEL_ZAI_GLM5: &str = "glm-5";
98pub const MODEL_MINIMAX_M2_5: &str = "MiniMax-M2.5";
99
100/// `OpenAI` LLM provider using the Chat Completions API.
101///
102/// Also supports `OpenAI`-compatible APIs (Ollama, vLLM, Azure `OpenAI`, etc.)
103/// via the `with_base_url` constructor.
104#[derive(Clone)]
105pub struct OpenAIProvider {
106    client: reqwest::Client,
107    api_key: String,
108    model: String,
109    base_url: String,
110    thinking: Option<ThinkingConfig>,
111    /// Extra headers applied to every request (e.g. for gateway authentication).
112    extra_headers: Vec<(String, String)>,
113}
114
115impl OpenAIProvider {
116    /// The conventional environment variable holding the `OpenAI` API key.
117    pub const API_KEY_ENV: &'static str = "OPENAI_API_KEY";
118
119    /// Create a new `OpenAI` provider with the specified API key and model.
120    #[must_use]
121    pub fn new(api_key: impl Into<String>, model: impl Into<String>) -> Self {
122        Self {
123            client: reqwest::Client::new(),
124            api_key: api_key.into(),
125            model: model.into(),
126            base_url: DEFAULT_BASE_URL.to_owned(),
127            thinking: None,
128            extra_headers: Vec::new(),
129        }
130    }
131
132    /// Create a provider using GPT-5, reading the API key from the
133    /// conventional [`OPENAI_API_KEY`](Self::API_KEY_ENV) environment variable.
134    ///
135    /// # Panics
136    ///
137    /// Panics if `OPENAI_API_KEY` is not set. Prefer
138    /// [`try_from_env`](Self::try_from_env) outside of examples/tests.
139    #[must_use]
140    pub fn from_env() -> Self {
141        Self::try_from_env().unwrap_or_else(|e| panic!("{e}"))
142    }
143
144    /// Create a provider using GPT-5, reading the API key from the
145    /// conventional [`OPENAI_API_KEY`](Self::API_KEY_ENV) environment variable.
146    ///
147    /// # Errors
148    ///
149    /// Returns an error if `OPENAI_API_KEY` is unset or not valid UTF-8.
150    pub fn try_from_env() -> Result<Self> {
151        let api_key = std::env::var(Self::API_KEY_ENV).map_err(|_| {
152            anyhow::anyhow!("environment variable `{}` is not set", Self::API_KEY_ENV)
153        })?;
154        Ok(Self::gpt5(api_key))
155    }
156
157    /// Create a new provider with a custom base URL for OpenAI-compatible APIs.
158    #[must_use]
159    pub fn with_base_url(
160        api_key: impl Into<String>,
161        model: impl Into<String>,
162        base_url: impl Into<String>,
163    ) -> Self {
164        Self {
165            client: reqwest::Client::new(),
166            api_key: api_key.into(),
167            model: model.into(),
168            base_url: base_url.into(),
169            thinking: None,
170            extra_headers: Vec::new(),
171        }
172    }
173
174    /// Create a provider using Moonshot KIMI via OpenAI-compatible Chat Completions.
175    #[must_use]
176    pub fn kimi(api_key: String, model: String) -> Self {
177        Self::with_base_url(api_key, model, BASE_URL_KIMI.to_owned())
178    }
179
180    /// Create a provider using KIMI K2.5 (default KIMI model).
181    #[must_use]
182    pub fn kimi_k2_5(api_key: String) -> Self {
183        Self::kimi(api_key, MODEL_KIMI_K2_5.to_owned())
184    }
185
186    /// Create a provider using KIMI K2 Thinking.
187    #[must_use]
188    pub fn kimi_k2_thinking(api_key: String) -> Self {
189        Self::kimi(api_key, MODEL_KIMI_K2_THINKING.to_owned())
190    }
191
192    /// Create a provider using z.ai via OpenAI-compatible Chat Completions.
193    #[must_use]
194    pub fn zai(api_key: String, model: String) -> Self {
195        Self::with_base_url(api_key, model, BASE_URL_ZAI.to_owned())
196    }
197
198    /// Create a provider using z.ai GLM-5 (default z.ai agentic reasoning model).
199    #[must_use]
200    pub fn zai_glm5(api_key: String) -> Self {
201        Self::zai(api_key, MODEL_ZAI_GLM5.to_owned())
202    }
203
204    /// Create a provider using `MiniMax` via OpenAI-compatible Chat Completions.
205    #[must_use]
206    pub fn minimax(api_key: String, model: String) -> Self {
207        Self::with_base_url(api_key, model, BASE_URL_MINIMAX.to_owned())
208    }
209
210    /// Create a provider using `MiniMax` M2.5 (default `MiniMax` model).
211    #[must_use]
212    pub fn minimax_m2_5(api_key: String) -> Self {
213        Self::minimax(api_key, MODEL_MINIMAX_M2_5.to_owned())
214    }
215
216    /// Create a provider using GPT-5.2 Instant (speed-optimized for routine queries).
217    #[must_use]
218    pub fn gpt52_instant(api_key: String) -> Self {
219        Self::new(api_key, MODEL_GPT52_INSTANT.to_owned())
220    }
221
222    /// Create a provider using GPT-5.4 (frontier reasoning with 1.05M context).
223    #[must_use]
224    pub fn gpt54(api_key: String) -> Self {
225        Self::new(api_key, MODEL_GPT54.to_owned())
226    }
227
228    /// Create a provider using GPT-5.3 Codex (latest codex model).
229    #[must_use]
230    pub fn gpt53_codex(api_key: String) -> Self {
231        Self::new(api_key, MODEL_GPT53_CODEX.to_owned())
232    }
233
234    /// Create a provider using GPT-5.2 Thinking (complex reasoning, coding, analysis).
235    #[must_use]
236    pub fn gpt52_thinking(api_key: String) -> Self {
237        Self::new(api_key, MODEL_GPT52_THINKING.to_owned())
238    }
239
240    /// Create a provider using GPT-5.2 Pro (maximum accuracy for difficult problems).
241    #[must_use]
242    pub fn gpt52_pro(api_key: String) -> Self {
243        Self::new(api_key, MODEL_GPT52_PRO.to_owned())
244    }
245
246    /// Create a provider using the latest Codex model.
247    #[must_use]
248    pub fn codex(api_key: String) -> Self {
249        Self::gpt53_codex(api_key)
250    }
251
252    /// Create a provider using GPT-5 (400k context, coding and reasoning).
253    #[must_use]
254    pub fn gpt5(api_key: String) -> Self {
255        Self::new(api_key, MODEL_GPT5.to_owned())
256    }
257
258    /// Create a provider using GPT-5-mini (faster, cost-efficient GPT-5).
259    #[must_use]
260    pub fn gpt5_mini(api_key: String) -> Self {
261        Self::new(api_key, MODEL_GPT5_MINI.to_owned())
262    }
263
264    /// Create a provider using GPT-5-nano (fastest, cheapest GPT-5 variant).
265    #[must_use]
266    pub fn gpt5_nano(api_key: String) -> Self {
267        Self::new(api_key, MODEL_GPT5_NANO.to_owned())
268    }
269
270    /// Create a provider using o3 (most intelligent reasoning model).
271    #[must_use]
272    pub fn o3(api_key: String) -> Self {
273        Self::new(api_key, MODEL_O3.to_owned())
274    }
275
276    /// Create a provider using o3-mini (smaller o3 variant).
277    #[must_use]
278    pub fn o3_mini(api_key: String) -> Self {
279        Self::new(api_key, MODEL_O3_MINI.to_owned())
280    }
281
282    /// Create a provider using o4-mini (fast, cost-efficient reasoning).
283    #[must_use]
284    pub fn o4_mini(api_key: String) -> Self {
285        Self::new(api_key, MODEL_O4_MINI.to_owned())
286    }
287
288    /// Create a provider using o1 (reasoning model).
289    #[must_use]
290    pub fn o1(api_key: String) -> Self {
291        Self::new(api_key, MODEL_O1.to_owned())
292    }
293
294    /// Create a provider using o1-mini (fast reasoning model).
295    #[must_use]
296    pub fn o1_mini(api_key: String) -> Self {
297        Self::new(api_key, MODEL_O1_MINI.to_owned())
298    }
299
300    /// Create a provider using GPT-4.1 (improved instruction following, 1M context).
301    #[must_use]
302    pub fn gpt41(api_key: String) -> Self {
303        Self::new(api_key, MODEL_GPT41.to_owned())
304    }
305
306    /// Create a provider using GPT-4.1-mini (smaller, faster GPT-4.1).
307    #[must_use]
308    pub fn gpt41_mini(api_key: String) -> Self {
309        Self::new(api_key, MODEL_GPT41_MINI.to_owned())
310    }
311
312    /// Create a provider using GPT-4o.
313    #[must_use]
314    pub fn gpt4o(api_key: String) -> Self {
315        Self::new(api_key, MODEL_GPT4O.to_owned())
316    }
317
318    /// Create a provider using GPT-4o-mini (fast and cost-effective).
319    #[must_use]
320    pub fn gpt4o_mini(api_key: String) -> Self {
321        Self::new(api_key, MODEL_GPT4O_MINI.to_owned())
322    }
323
324    /// Set the provider-owned thinking configuration for this model.
325    #[must_use]
326    pub const fn with_thinking(mut self, thinking: ThinkingConfig) -> Self {
327        self.thinking = Some(thinking);
328        self
329    }
330
331    /// Add extra HTTP headers applied to every request.
332    #[must_use]
333    pub fn with_extra_headers(mut self, headers: Vec<(String, String)>) -> Self {
334        self.extra_headers = headers;
335        self
336    }
337
338    /// Apply auth + extra headers. Skips `Authorization` when `api_key` is
339    /// empty (BYOK gateway mode — auth handled via `extra_headers`).
340    fn apply_headers(&self, builder: reqwest::RequestBuilder) -> reqwest::RequestBuilder {
341        let builder = if self.api_key.is_empty() {
342            builder
343        } else {
344            builder.header("Authorization", format!("Bearer {}", self.api_key))
345        };
346        self.extra_headers
347            .iter()
348            .fold(builder, |b, (k, v)| b.header(k.as_str(), v.as_str()))
349    }
350}
351
352#[async_trait]
353impl LlmProvider for OpenAIProvider {
354    async fn chat(&self, request: ChatRequest) -> Result<ChatOutcome> {
355        // Route official OpenAI agentic flows to the Responses API.
356        if should_use_responses_api(&self.base_url, &self.model, &request) {
357            let mut responses_provider = OpenAIResponsesProvider::with_base_url(
358                self.api_key.clone(),
359                self.model.clone(),
360                self.base_url.clone(),
361            );
362            if let Some(thinking) = self.thinking.clone() {
363                responses_provider = responses_provider.with_thinking(thinking);
364            }
365            return responses_provider.chat(request).await;
366        }
367
368        let thinking_config = match self.resolve_thinking_config(request.thinking.as_ref()) {
369            Ok(thinking) => thinking,
370            Err(error) => return Ok(ChatOutcome::InvalidRequest(error.to_string())),
371        };
372        if let Err(error) = validate_request_attachments(self.provider(), self.model(), &request) {
373            return Ok(ChatOutcome::InvalidRequest(error.to_string()));
374        }
375        let reasoning = build_api_reasoning(thinking_config.as_ref());
376        let messages = build_api_messages(&request);
377        let tools: Option<Vec<ApiTool>> = request
378            .tools
379            .map(|ts| ts.into_iter().map(convert_tool).collect());
380        let tool_choice = request
381            .tool_choice
382            .as_ref()
383            .map(ApiToolChoice::from_tool_choice);
384        let response_format = request
385            .response_format
386            .as_ref()
387            .map(ApiResponseFormat::from_response_format);
388
389        let include_max_tokens_alias = use_max_tokens_alias(&self.base_url);
390        let api_request = ApiChatRequest {
391            model: &self.model,
392            messages: &messages,
393            max_completion_tokens: Some(request.max_tokens),
394            max_tokens: include_max_tokens_alias.then_some(request.max_tokens),
395            tools: tools.as_deref(),
396            tool_choice,
397            reasoning,
398            response_format,
399        };
400
401        log::debug!(
402            "OpenAI LLM request model={} max_tokens={}",
403            self.model,
404            request.max_tokens
405        );
406
407        let builder = self
408            .client
409            .post(format!("{}/chat/completions", self.base_url))
410            .header("Content-Type", "application/json");
411        let response = self
412            .apply_headers(builder)
413            .json(&api_request)
414            .send()
415            .await
416            .map_err(|e| anyhow::anyhow!("request failed: {e}"))?;
417
418        let status = response.status();
419        let bytes = response
420            .bytes()
421            .await
422            .map_err(|e| anyhow::anyhow!("failed to read response body: {e}"))?;
423
424        log::debug!(
425            "OpenAI LLM response status={} body_len={}",
426            status,
427            bytes.len()
428        );
429
430        decode_chat_response(status, &bytes)
431    }
432
433    #[allow(clippy::too_many_lines)]
434    fn chat_stream(&self, request: ChatRequest) -> StreamBox<'_> {
435        // Route official OpenAI agentic flows to the Responses API.
436        if should_use_responses_api(&self.base_url, &self.model, &request) {
437            let api_key = self.api_key.clone();
438            let model = self.model.clone();
439            let base_url = self.base_url.clone();
440            let thinking = self.thinking.clone();
441            return Box::pin(async_stream::stream! {
442                let mut responses_provider =
443                    OpenAIResponsesProvider::with_base_url(api_key, model, base_url);
444                if let Some(thinking) = thinking {
445                    responses_provider = responses_provider.with_thinking(thinking);
446                }
447                let mut stream = std::pin::pin!(responses_provider.chat_stream(request));
448                while let Some(item) = futures::StreamExt::next(&mut stream).await {
449                    yield item;
450                }
451            });
452        }
453
454        Box::pin(async_stream::stream! {
455            let thinking_config = match self.resolve_thinking_config(request.thinking.as_ref()) {
456                Ok(thinking) => thinking,
457                Err(error) => {
458                    yield Ok(StreamDelta::Error {
459                        message: error.to_string(),
460                        kind: StreamErrorKind::InvalidRequest,
461                    });
462                    return;
463                }
464            };
465            if let Err(error) = validate_request_attachments(self.provider(), self.model(), &request) {
466                yield Ok(StreamDelta::Error {
467                    message: error.to_string(),
468                    kind: StreamErrorKind::InvalidRequest,
469                });
470                return;
471            }
472            let reasoning = build_api_reasoning(thinking_config.as_ref());
473            let messages = build_api_messages(&request);
474            let tools: Option<Vec<ApiTool>> = request
475                .tools
476                .map(|ts| ts.into_iter().map(convert_tool).collect());
477            let tool_choice = request
478                .tool_choice
479                .as_ref()
480                .map(ApiToolChoice::from_tool_choice);
481            let response_format = request
482                .response_format
483                .as_ref()
484                .map(ApiResponseFormat::from_response_format);
485
486            let include_max_tokens_alias = use_max_tokens_alias(&self.base_url);
487            let include_stream_usage = use_stream_usage_options(&self.base_url);
488            let api_request = ApiChatRequestStreaming {
489                model: &self.model,
490                messages: &messages,
491                max_completion_tokens: Some(request.max_tokens),
492                max_tokens: include_max_tokens_alias.then_some(request.max_tokens),
493                tools: tools.as_deref(),
494                tool_choice,
495                reasoning,
496                response_format,
497                stream_options: include_stream_usage.then_some(ApiStreamOptions {
498                    include_usage: true,
499                }),
500                stream: true,
501            };
502
503            log::debug!("OpenAI streaming LLM request model={} max_tokens={}", self.model, request.max_tokens);
504
505            let stream_builder = self.client
506                .post(format!("{}/chat/completions", self.base_url))
507                .header("Content-Type", "application/json");
508            let Ok(response) = self
509                .apply_headers(stream_builder)
510                .json(&api_request)
511                .send()
512                .await
513            else {
514                yield Err(anyhow::anyhow!("request failed"));
515                return;
516            };
517
518            let status = response.status();
519
520            if !status.is_success() {
521                let body = response.text().await.unwrap_or_default();
522                let (kind, level) = if status == StatusCode::TOO_MANY_REQUESTS {
523                    (StreamErrorKind::RateLimited, "rate_limit")
524                } else if status.is_server_error() {
525                    (StreamErrorKind::ServerError, "server_error")
526                } else {
527                    (StreamErrorKind::InvalidRequest, "client_error")
528                };
529                log::warn!("OpenAI error status={status} body={body} kind={level}");
530                yield Ok(StreamDelta::Error { message: body, kind });
531                return;
532            }
533
534            // Track tool call state across deltas
535            let mut tool_calls: std::collections::HashMap<usize, ToolCallAccumulator> =
536                std::collections::HashMap::new();
537            let mut usage: Option<Usage> = None;
538            let mut buffer = String::new();
539            let mut stream = response.bytes_stream();
540
541            while let Some(chunk_result) = stream.next().await {
542                let Ok(chunk) = chunk_result else {
543                    yield Err(anyhow::anyhow!("stream error: {}", chunk_result.unwrap_err()));
544                    return;
545                };
546                buffer.push_str(&String::from_utf8_lossy(&chunk));
547
548                while let Some(pos) = buffer.find('\n') {
549                    let line = buffer[..pos].trim().to_string();
550                    buffer = buffer[pos + 1..].to_string();
551                    if line.is_empty() { continue; }
552                    let Some(data) = line.strip_prefix("data: ") else { continue; };
553
554                    for result in process_sse_data(data) {
555                        match result {
556                            SseProcessResult::TextDelta(c) => yield Ok(StreamDelta::TextDelta { delta: c, block_index: 0 }),
557                            SseProcessResult::ThinkingDelta(c) => yield Ok(StreamDelta::ThinkingDelta { delta: c, block_index: 0 }),
558                            SseProcessResult::ToolCallUpdate { index, id, name, arguments } => apply_tool_call_update(&mut tool_calls, index, id, name, arguments),
559                            SseProcessResult::Usage(u) => usage = Some(u),
560                            SseProcessResult::Done(sr) => {
561                                for d in build_stream_end_deltas(&tool_calls, usage.take(), sr) { yield Ok(d); }
562                                return;
563                            }
564                            SseProcessResult::Sentinel => {
565                                let sr = if tool_calls.is_empty() { StopReason::EndTurn } else { StopReason::ToolUse };
566                                for d in build_stream_end_deltas(&tool_calls, usage.take(), sr) { yield Ok(d); }
567                                return;
568                            }
569                        }
570                    }
571                }
572            }
573
574            // Stream ended without [DONE] - emit what we have
575            for delta in build_stream_end_deltas(&tool_calls, usage, StopReason::EndTurn) {
576                yield Ok(delta);
577            }
578        })
579    }
580
581    fn model(&self) -> &str {
582        &self.model
583    }
584
585    fn provider(&self) -> &'static str {
586        "openai"
587    }
588
589    fn configured_thinking(&self) -> Option<&ThinkingConfig> {
590        self.thinking.as_ref()
591    }
592}
593
594/// Apply a tool call update to the accumulator.
595fn apply_tool_call_update(
596    tool_calls: &mut std::collections::HashMap<usize, ToolCallAccumulator>,
597    index: usize,
598    id: Option<String>,
599    name: Option<String>,
600    arguments: Option<String>,
601) {
602    let entry = tool_calls
603        .entry(index)
604        .or_insert_with(|| ToolCallAccumulator {
605            id: String::new(),
606            name: String::new(),
607            arguments: String::new(),
608        });
609    if let Some(id) = id {
610        entry.id = id;
611    }
612    if let Some(name) = name {
613        entry.name = name;
614    }
615    if let Some(args) = arguments {
616        entry.arguments.push_str(&args);
617    }
618}
619
620/// Helper to emit tool call deltas and done event.
621fn build_stream_end_deltas(
622    tool_calls: &std::collections::HashMap<usize, ToolCallAccumulator>,
623    usage: Option<Usage>,
624    stop_reason: StopReason,
625) -> Vec<StreamDelta> {
626    let mut deltas = Vec::new();
627
628    // Emit tool calls
629    for (idx, tool) in tool_calls {
630        deltas.push(StreamDelta::ToolUseStart {
631            id: tool.id.clone(),
632            name: tool.name.clone(),
633            block_index: *idx + 1,
634            thought_signature: None,
635        });
636        deltas.push(StreamDelta::ToolInputDelta {
637            id: tool.id.clone(),
638            delta: tool.arguments.clone(),
639            block_index: *idx + 1,
640        });
641    }
642
643    // Emit usage
644    if let Some(u) = usage {
645        deltas.push(StreamDelta::Usage(u));
646    }
647
648    // Emit done
649    deltas.push(StreamDelta::Done {
650        stop_reason: Some(stop_reason),
651    });
652
653    deltas
654}
655
656/// Result of processing an SSE chunk.
657enum SseProcessResult {
658    /// Emit a text delta.
659    TextDelta(String),
660    /// Emit a thinking/reasoning delta (reasoning-model fallback when the model
661    /// streams its output via `reasoning_content`/`reasoning` and `content` is
662    /// empty, mirroring the non-streaming `build_content_blocks` fallback).
663    ThinkingDelta(String),
664    /// Update tool call accumulator (index, optional id, optional name, optional args).
665    ToolCallUpdate {
666        index: usize,
667        id: Option<String>,
668        name: Option<String>,
669        arguments: Option<String>,
670    },
671    /// Usage information.
672    Usage(Usage),
673    /// Stream is done with a stop reason.
674    Done(StopReason),
675    /// Stream sentinel [DONE] was received.
676    Sentinel,
677}
678
679/// Process an SSE data line and return results to apply.
680fn process_sse_data(data: &str) -> Vec<SseProcessResult> {
681    if data == "[DONE]" {
682        return vec![SseProcessResult::Sentinel];
683    }
684
685    let Ok(chunk) = serde_json::from_str::<SseChunk>(data) else {
686        return vec![];
687    };
688
689    let mut results = Vec::new();
690
691    // Extract usage if present
692    if let Some(u) = chunk.usage {
693        results.push(SseProcessResult::Usage(Usage {
694            input_tokens: u.prompt_tokens,
695            output_tokens: u.completion_tokens,
696            cached_input_tokens: u
697                .prompt_tokens_details
698                .as_ref()
699                .map_or(0, |details| details.cached_tokens),
700            cache_creation_input_tokens: 0,
701        }));
702    }
703
704    // Process choices
705    if let Some(choice) = chunk.choices.into_iter().next() {
706        // Handle text content delta. When `content` is empty/absent but the
707        // model streamed reasoning tokens (DeepSeek-style answer-in-
708        // `reasoning_content`, or `OpenRouter`-normalized `reasoning`), surface
709        // the reasoning as a thinking delta so the usable output is not silently
710        // dropped under streaming. This mirrors the non-streaming
711        // `build_content_blocks` fallback: text content takes precedence and the
712        // reasoning fallback only fires when `content` is empty.
713        if let Some(content) = choice.delta.content
714            && !content.is_empty()
715        {
716            results.push(SseProcessResult::TextDelta(content));
717        } else if let Some(reasoning) = choice
718            .delta
719            .reasoning_content
720            .as_deref()
721            .or(choice.delta.reasoning.as_deref())
722            .filter(|r| !r.is_empty())
723        {
724            results.push(SseProcessResult::ThinkingDelta(reasoning.to_owned()));
725        }
726
727        // Handle tool call deltas
728        if let Some(tc_deltas) = choice.delta.tool_calls {
729            for tc in tc_deltas {
730                results.push(SseProcessResult::ToolCallUpdate {
731                    index: tc.index,
732                    id: tc.id,
733                    name: tc.function.as_ref().and_then(|f| f.name.clone()),
734                    arguments: tc.function.as_ref().and_then(|f| f.arguments.clone()),
735                });
736            }
737        }
738
739        // Check for finish reason
740        if let Some(finish_reason) = choice.finish_reason {
741            results.push(SseProcessResult::Done(map_finish_reason(&finish_reason)));
742        }
743    }
744
745    results
746}
747
748fn use_max_tokens_alias(base_url: &str) -> bool {
749    base_url.contains("moonshot.ai")
750        || base_url.contains("api.z.ai")
751        || base_url.contains("minimax.io")
752}
753
754fn use_stream_usage_options(base_url: &str) -> bool {
755    base_url == DEFAULT_BASE_URL || base_url.contains("api.openai.com")
756}
757
758/// Map an HTTP status + body into a [`ChatOutcome`], parsing the success body
759/// into a [`ChatResponse`].
760fn decode_chat_response(status: StatusCode, bytes: &[u8]) -> Result<ChatOutcome> {
761    if status == StatusCode::TOO_MANY_REQUESTS {
762        return Ok(ChatOutcome::RateLimited);
763    }
764
765    if status.is_server_error() {
766        let body = String::from_utf8_lossy(bytes);
767        log::error!("OpenAI server error status={status} body={body}");
768        return Ok(ChatOutcome::ServerError(body.into_owned()));
769    }
770
771    if status.is_client_error() {
772        let body = String::from_utf8_lossy(bytes);
773        log::warn!("OpenAI client error status={status} body={body}");
774        return Ok(ChatOutcome::InvalidRequest(body.into_owned()));
775    }
776
777    let api_response: ApiChatResponse = serde_json::from_slice(bytes)
778        .map_err(|e| anyhow::anyhow!("failed to parse response: {e}"))?;
779
780    let choice = api_response
781        .choices
782        .into_iter()
783        .next()
784        .ok_or_else(|| anyhow::anyhow!("no choices in response"))?;
785
786    let content = build_content_blocks(&choice.message);
787    let stop_reason = choice.finish_reason.as_deref().map(map_finish_reason);
788
789    Ok(ChatOutcome::Success(ChatResponse {
790        id: api_response.id,
791        content,
792        model: api_response.model,
793        stop_reason,
794        usage: Usage {
795            input_tokens: api_response.usage.prompt_tokens,
796            output_tokens: api_response.usage.completion_tokens,
797            cached_input_tokens: api_response
798                .usage
799                .prompt_tokens_details
800                .as_ref()
801                .map_or(0, |details| details.cached_tokens),
802            cache_creation_input_tokens: 0,
803        },
804    }))
805}
806
807fn map_finish_reason(finish_reason: &str) -> StopReason {
808    match finish_reason {
809        "stop" => StopReason::EndTurn,
810        "tool_calls" => StopReason::ToolUse,
811        "length" => StopReason::MaxTokens,
812        "content_filter" | "network_error" => StopReason::StopSequence,
813        "sensitive" => StopReason::Refusal,
814        unknown => {
815            log::debug!("Unknown finish_reason from OpenAI-compatible API: {unknown}");
816            StopReason::StopSequence
817        }
818    }
819}
820
821fn build_api_reasoning(thinking: Option<&ThinkingConfig>) -> Option<ApiReasoning> {
822    thinking
823        .and_then(resolve_reasoning_effort)
824        .map(|effort| ApiReasoning { effort })
825}
826
827const fn resolve_reasoning_effort(config: &ThinkingConfig) -> Option<ReasoningEffort> {
828    if let Some(effort) = config.effort {
829        return Some(map_effort(effort));
830    }
831
832    match &config.mode {
833        ThinkingMode::Adaptive => None,
834        ThinkingMode::Enabled { budget_tokens } => Some(map_budget_to_reasoning(*budget_tokens)),
835    }
836}
837
838const fn map_effort(effort: Effort) -> ReasoningEffort {
839    match effort {
840        Effort::Low => ReasoningEffort::Low,
841        Effort::Medium => ReasoningEffort::Medium,
842        Effort::High => ReasoningEffort::High,
843        Effort::Max => ReasoningEffort::XHigh,
844    }
845}
846
847const fn map_budget_to_reasoning(budget_tokens: u32) -> ReasoningEffort {
848    if budget_tokens <= 4_096 {
849        ReasoningEffort::Low
850    } else if budget_tokens <= 16_384 {
851        ReasoningEffort::Medium
852    } else if budget_tokens <= 32_768 {
853        ReasoningEffort::High
854    } else {
855        ReasoningEffort::XHigh
856    }
857}
858
859const fn api_role(role: agent_sdk_foundation::llm::Role) -> ApiRole {
860    match role {
861        agent_sdk_foundation::llm::Role::User => ApiRole::User,
862        agent_sdk_foundation::llm::Role::Assistant => ApiRole::Assistant,
863    }
864}
865
866/// Convert a `Content::Blocks` message into the `OpenAI` wire messages it maps
867/// to, appending them to `messages`.
868///
869/// Tool results become standalone `tool` messages; text, tool calls and (on
870/// assistant tool-call turns) echoed-back reasoning collapse into a single
871/// message.
872fn append_block_messages(
873    messages: &mut Vec<ApiMessage>,
874    role: agent_sdk_foundation::llm::Role,
875    blocks: &[ContentBlock],
876) {
877    let mut text_parts = Vec::new();
878    let mut thinking_parts = Vec::new();
879    let mut tool_calls = Vec::new();
880
881    for block in blocks {
882        match block {
883            ContentBlock::Text { text } => text_parts.push(text.clone()),
884            ContentBlock::Thinking { thinking, .. } => {
885                // DeepSeek-style thinking-mode multi-turn requires the prior
886                // assistant reasoning_content to be echoed back on a tool-call
887                // turn or the API 400s. Collected here; only carried into
888                // reasoning_content below when this turn also has a tool call.
889                thinking_parts.push(thinking.clone());
890            }
891            ContentBlock::RedactedThinking { .. }
892            | ContentBlock::Image { .. }
893            | ContentBlock::Document { .. } => {
894                // These blocks are not sent to the OpenAI API
895            }
896            ContentBlock::ToolUse {
897                id, name, input, ..
898            } => {
899                tool_calls.push(ApiToolCall {
900                    id: id.clone(),
901                    r#type: "function".to_owned(),
902                    function: ApiFunctionCall {
903                        name: name.clone(),
904                        arguments: serde_json::to_string(input).unwrap_or_else(|_| "{}".to_owned()),
905                    },
906                });
907            }
908            ContentBlock::ToolResult {
909                tool_use_id,
910                content,
911                ..
912            } => {
913                // Tool results are separate messages in OpenAI
914                messages.push(ApiMessage {
915                    role: ApiRole::Tool,
916                    content: Some(content.clone()),
917                    reasoning_content: None,
918                    tool_calls: None,
919                    tool_call_id: Some(tool_use_id.clone()),
920                });
921            }
922            // `ContentBlock` is `#[non_exhaustive]`; a block kind this SDK
923            // version cannot represent is not sent to OpenAI.
924            _ => log::warn!("Skipping unrecognized OpenAI content block"),
925        }
926    }
927
928    let role = api_role(role);
929
930    // reasoning_content is only echoed back on an assistant turn that ALSO
931    // carries a tool call — the one case DeepSeek's thinking-mode protocol
932    // requires it. Per that protocol legacy `deepseek-reasoner` 400s if
933    // reasoning_content appears in input at all, and DeepSeek V4 thinking-mode
934    // only needs it on tool-call turns. So a plain reasoning-only assistant
935    // turn (no tool call) does NOT carry reasoning_content, and it is never
936    // attached to user messages.
937    let reasoning_content =
938        if role == ApiRole::Assistant && !thinking_parts.is_empty() && !tool_calls.is_empty() {
939            Some(thinking_parts.join("\n"))
940        } else {
941            None
942        };
943
944    // Add the message when it carries text, tool calls, or (for an assistant
945    // turn) reasoning to echo back. Only emit if it's an assistant message or
946    // has text content.
947    let has_payload =
948        !text_parts.is_empty() || !tool_calls.is_empty() || reasoning_content.is_some();
949    if has_payload && (role == ApiRole::Assistant || !text_parts.is_empty()) {
950        messages.push(ApiMessage {
951            role,
952            content: if text_parts.is_empty() {
953                None
954            } else {
955                Some(text_parts.join("\n"))
956            },
957            reasoning_content,
958            tool_calls: if tool_calls.is_empty() {
959                None
960            } else {
961                Some(tool_calls)
962            },
963            tool_call_id: None,
964        });
965    }
966}
967
968fn build_api_messages(request: &ChatRequest) -> Vec<ApiMessage> {
969    let mut messages = Vec::new();
970
971    // Add system message first (OpenAI uses a separate message for system prompt)
972    if !request.system.is_empty() {
973        messages.push(ApiMessage {
974            role: ApiRole::System,
975            content: Some(request.system.clone()),
976            reasoning_content: None,
977            tool_calls: None,
978            tool_call_id: None,
979        });
980    }
981
982    // Convert SDK messages to OpenAI format
983    for msg in &request.messages {
984        match &msg.content {
985            Content::Text(text) => {
986                messages.push(ApiMessage {
987                    role: api_role(msg.role),
988                    content: Some(text.clone()),
989                    reasoning_content: None,
990                    tool_calls: None,
991                    tool_call_id: None,
992                });
993            }
994            Content::Blocks(blocks) => append_block_messages(&mut messages, msg.role, blocks),
995        }
996    }
997
998    messages
999}
1000
1001fn convert_tool(t: agent_sdk_foundation::llm::Tool) -> ApiTool {
1002    ApiTool {
1003        r#type: "function".to_owned(),
1004        function: ApiFunction {
1005            name: t.name,
1006            description: t.description,
1007            parameters: t.input_schema,
1008        },
1009    }
1010}
1011
1012/// Non-empty reasoning text from an `OpenAI`-compatible response message, if any.
1013///
1014/// Prefers `DeepSeek`-style `reasoning_content`, falling back to the `reasoning`
1015/// field used by some `OpenRouter` upstreams.
1016fn reasoning_text(message: &ApiResponseMessage) -> Option<&str> {
1017    message
1018        .reasoning_content
1019        .as_deref()
1020        .or(message.reasoning.as_deref())
1021        .filter(|r| !r.is_empty())
1022}
1023
1024fn build_content_blocks(message: &ApiResponseMessage) -> Vec<ContentBlock> {
1025    let mut blocks = Vec::new();
1026
1027    // Add text content if present
1028    if let Some(content) = &message.content
1029        && !content.is_empty()
1030    {
1031        blocks.push(ContentBlock::Text {
1032            text: content.clone(),
1033        });
1034    } else if let Some(reasoning) = reasoning_text(message) {
1035        // Reasoning-model fallback: when `content` is empty/absent but the model
1036        // produced reasoning tokens (DeepSeek-style answer-in-`reasoning_content`,
1037        // or any reasoning model truncated under a tight `max_tokens` before it
1038        // emitted visible content), surface the reasoning as a Thinking block so
1039        // the usable output is not silently dropped. This is a fallback only —
1040        // when `content` is present the reasoning is left untouched.
1041        blocks.push(ContentBlock::Thinking {
1042            thinking: reasoning.to_owned(),
1043            signature: None,
1044        });
1045    }
1046
1047    // Add tool calls if present
1048    if let Some(tool_calls) = &message.tool_calls {
1049        for tc in tool_calls {
1050            let input: serde_json::Value = serde_json::from_str(&tc.function.arguments)
1051                .unwrap_or_else(|_| serde_json::json!({}));
1052            blocks.push(ContentBlock::ToolUse {
1053                id: tc.id.clone(),
1054                name: tc.function.name.clone(),
1055                input,
1056                thought_signature: None,
1057            });
1058        }
1059    }
1060
1061    blocks
1062}
1063
1064// ============================================================================
1065// API Request Types
1066// ============================================================================
1067
1068#[derive(Serialize)]
1069struct ApiChatRequest<'a> {
1070    model: &'a str,
1071    messages: &'a [ApiMessage],
1072    #[serde(skip_serializing_if = "Option::is_none")]
1073    max_completion_tokens: Option<u32>,
1074    #[serde(skip_serializing_if = "Option::is_none")]
1075    max_tokens: Option<u32>,
1076    #[serde(skip_serializing_if = "Option::is_none")]
1077    tools: Option<&'a [ApiTool]>,
1078    #[serde(skip_serializing_if = "Option::is_none")]
1079    tool_choice: Option<ApiToolChoice>,
1080    #[serde(skip_serializing_if = "Option::is_none")]
1081    reasoning: Option<ApiReasoning>,
1082    #[serde(skip_serializing_if = "Option::is_none")]
1083    response_format: Option<ApiResponseFormat>,
1084}
1085
1086#[derive(Serialize)]
1087struct ApiChatRequestStreaming<'a> {
1088    model: &'a str,
1089    messages: &'a [ApiMessage],
1090    #[serde(skip_serializing_if = "Option::is_none")]
1091    max_completion_tokens: Option<u32>,
1092    #[serde(skip_serializing_if = "Option::is_none")]
1093    max_tokens: Option<u32>,
1094    #[serde(skip_serializing_if = "Option::is_none")]
1095    tools: Option<&'a [ApiTool]>,
1096    #[serde(skip_serializing_if = "Option::is_none")]
1097    tool_choice: Option<ApiToolChoice>,
1098    #[serde(skip_serializing_if = "Option::is_none")]
1099    reasoning: Option<ApiReasoning>,
1100    #[serde(skip_serializing_if = "Option::is_none")]
1101    response_format: Option<ApiResponseFormat>,
1102    #[serde(skip_serializing_if = "Option::is_none")]
1103    stream_options: Option<ApiStreamOptions>,
1104    stream: bool,
1105}
1106
1107/// `OpenAI` `tool_choice` wire format.
1108///
1109/// - `"auto"` — model decides.
1110/// - `{"type": "function", "function": {"name": "<name>"}}` — force a specific function.
1111#[derive(Serialize)]
1112#[serde(untagged)]
1113enum ApiToolChoice {
1114    String(String),
1115    Named {
1116        #[serde(rename = "type")]
1117        choice_type: String,
1118        function: ApiToolChoiceFunction,
1119    },
1120}
1121
1122#[derive(Serialize)]
1123struct ApiToolChoiceFunction {
1124    name: String,
1125}
1126
1127impl ApiToolChoice {
1128    fn from_tool_choice(tc: &agent_sdk_foundation::llm::ToolChoice) -> Self {
1129        match tc {
1130            agent_sdk_foundation::llm::ToolChoice::Auto => Self::String("auto".to_owned()),
1131            agent_sdk_foundation::llm::ToolChoice::Tool(name) => Self::Named {
1132                choice_type: "function".to_owned(),
1133                function: ApiToolChoiceFunction { name: name.clone() },
1134            },
1135        }
1136    }
1137}
1138
1139/// `OpenAI` `response_format` wire format for structured outputs.
1140///
1141/// Emits `{"type": "json_schema", "json_schema": {"name", "schema", "strict"}}`.
1142#[derive(Serialize)]
1143struct ApiResponseFormat {
1144    #[serde(rename = "type")]
1145    format_type: &'static str,
1146    json_schema: ApiJsonSchema,
1147}
1148
1149#[derive(Serialize)]
1150struct ApiJsonSchema {
1151    name: String,
1152    schema: serde_json::Value,
1153    strict: bool,
1154}
1155
1156impl ApiResponseFormat {
1157    fn from_response_format(rf: &agent_sdk_foundation::llm::ResponseFormat) -> Self {
1158        Self {
1159            format_type: "json_schema",
1160            json_schema: ApiJsonSchema {
1161                name: rf.name.clone(),
1162                schema: rf.schema.clone(),
1163                strict: rf.strict,
1164            },
1165        }
1166    }
1167}
1168
1169#[derive(Clone, Copy, Serialize)]
1170struct ApiStreamOptions {
1171    include_usage: bool,
1172}
1173
1174#[derive(Clone, Copy, Serialize)]
1175#[serde(rename_all = "lowercase")]
1176enum ReasoningEffort {
1177    Low,
1178    Medium,
1179    High,
1180    #[serde(rename = "xhigh")]
1181    XHigh,
1182}
1183
1184#[derive(Serialize)]
1185struct ApiReasoning {
1186    effort: ReasoningEffort,
1187}
1188
1189#[derive(Serialize)]
1190struct ApiMessage {
1191    role: ApiRole,
1192    #[serde(skip_serializing_if = "Option::is_none")]
1193    content: Option<String>,
1194    /// `DeepSeek`-style thinking-mode multi-turn requires the prior assistant
1195    /// `reasoning_content` to be echoed back on a tool-call turn or the API
1196    /// rejects it (HTTP 400). Carried back only for assistant turns that had a
1197    /// Thinking block AND a tool call; omitted entirely otherwise (including
1198    /// reasoning-only turns, since legacy `deepseek-reasoner` 400s if
1199    /// `reasoning_content` appears in input) so the normal path is unchanged.
1200    #[serde(skip_serializing_if = "Option::is_none")]
1201    reasoning_content: Option<String>,
1202    #[serde(skip_serializing_if = "Option::is_none")]
1203    tool_calls: Option<Vec<ApiToolCall>>,
1204    #[serde(skip_serializing_if = "Option::is_none")]
1205    tool_call_id: Option<String>,
1206}
1207
1208#[derive(Debug, Serialize, PartialEq, Eq)]
1209#[serde(rename_all = "lowercase")]
1210enum ApiRole {
1211    System,
1212    User,
1213    Assistant,
1214    Tool,
1215}
1216
1217#[derive(Serialize)]
1218struct ApiToolCall {
1219    id: String,
1220    r#type: String,
1221    function: ApiFunctionCall,
1222}
1223
1224#[derive(Serialize)]
1225struct ApiFunctionCall {
1226    name: String,
1227    arguments: String,
1228}
1229
1230#[derive(Serialize)]
1231struct ApiTool {
1232    r#type: String,
1233    function: ApiFunction,
1234}
1235
1236#[derive(Serialize)]
1237struct ApiFunction {
1238    name: String,
1239    description: String,
1240    parameters: serde_json::Value,
1241}
1242
1243// ============================================================================
1244// API Response Types
1245// ============================================================================
1246
1247#[derive(Deserialize)]
1248struct ApiChatResponse {
1249    id: String,
1250    choices: Vec<ApiChoice>,
1251    model: String,
1252    usage: ApiUsage,
1253}
1254
1255#[derive(Deserialize)]
1256struct ApiChoice {
1257    message: ApiResponseMessage,
1258    finish_reason: Option<String>,
1259}
1260
1261#[derive(Deserialize)]
1262struct ApiResponseMessage {
1263    content: Option<String>,
1264    tool_calls: Option<Vec<ApiResponseToolCall>>,
1265    /// `DeepSeek`-style chain-of-thought, returned at the same level as
1266    /// `content` (`DeepSeek` V4 / some `OpenRouter` providers).
1267    #[serde(default)]
1268    reasoning_content: Option<String>,
1269    /// `OpenRouter` normalizes reasoning under a `reasoning` field for some
1270    /// upstreams; treated as an equivalent fallback to `reasoning_content`.
1271    #[serde(default)]
1272    reasoning: Option<String>,
1273}
1274
1275#[derive(Deserialize)]
1276struct ApiResponseToolCall {
1277    id: String,
1278    function: ApiResponseFunctionCall,
1279}
1280
1281#[derive(Deserialize)]
1282struct ApiResponseFunctionCall {
1283    name: String,
1284    arguments: String,
1285}
1286
1287#[derive(Deserialize)]
1288struct ApiUsage {
1289    #[serde(deserialize_with = "deserialize_u32_from_number")]
1290    prompt_tokens: u32,
1291    #[serde(deserialize_with = "deserialize_u32_from_number")]
1292    completion_tokens: u32,
1293    #[serde(default)]
1294    prompt_tokens_details: Option<ApiPromptTokensDetails>,
1295}
1296
1297#[derive(Deserialize)]
1298struct ApiPromptTokensDetails {
1299    #[serde(default, deserialize_with = "deserialize_u32_from_number")]
1300    cached_tokens: u32,
1301}
1302
1303// ============================================================================
1304// SSE Streaming Types
1305// ============================================================================
1306
1307/// Accumulator for tool call state across stream deltas.
1308struct ToolCallAccumulator {
1309    id: String,
1310    name: String,
1311    arguments: String,
1312}
1313
1314/// A single chunk in `OpenAI`'s SSE stream.
1315#[derive(Deserialize)]
1316struct SseChunk {
1317    choices: Vec<SseChoice>,
1318    #[serde(default)]
1319    usage: Option<SseUsage>,
1320}
1321
1322#[derive(Deserialize)]
1323struct SseChoice {
1324    delta: SseDelta,
1325    finish_reason: Option<String>,
1326}
1327
1328#[derive(Deserialize)]
1329struct SseDelta {
1330    content: Option<String>,
1331    tool_calls: Option<Vec<SseToolCallDelta>>,
1332    /// `DeepSeek`-style streamed chain-of-thought, returned at the same level as
1333    /// `content` (`DeepSeek` V4 / some `OpenRouter` providers).
1334    #[serde(default)]
1335    reasoning_content: Option<String>,
1336    /// `OpenRouter` normalizes streamed reasoning under a `reasoning` field for
1337    /// some upstreams; treated as an equivalent fallback to `reasoning_content`.
1338    #[serde(default)]
1339    reasoning: Option<String>,
1340}
1341
1342#[derive(Deserialize)]
1343struct SseToolCallDelta {
1344    index: usize,
1345    id: Option<String>,
1346    function: Option<SseFunctionDelta>,
1347}
1348
1349#[derive(Deserialize)]
1350struct SseFunctionDelta {
1351    name: Option<String>,
1352    arguments: Option<String>,
1353}
1354
1355#[derive(Deserialize)]
1356struct SseUsage {
1357    #[serde(deserialize_with = "deserialize_u32_from_number")]
1358    prompt_tokens: u32,
1359    #[serde(deserialize_with = "deserialize_u32_from_number")]
1360    completion_tokens: u32,
1361    #[serde(default)]
1362    prompt_tokens_details: Option<ApiPromptTokensDetails>,
1363}
1364
1365fn deserialize_u32_from_number<'de, D>(deserializer: D) -> std::result::Result<u32, D::Error>
1366where
1367    D: serde::Deserializer<'de>,
1368{
1369    #[derive(Deserialize)]
1370    #[serde(untagged)]
1371    enum NumberLike {
1372        U64(u64),
1373        F64(f64),
1374    }
1375
1376    match NumberLike::deserialize(deserializer)? {
1377        NumberLike::U64(v) => u32::try_from(v)
1378            .map_err(|_| D::Error::custom(format!("token count out of range for u32: {v}"))),
1379        NumberLike::F64(v) => {
1380            if v.is_finite() && v >= 0.0 && v.fract() == 0.0 && v <= f64::from(u32::MAX) {
1381                v.to_string().parse::<u32>().map_err(|e| {
1382                    D::Error::custom(format!(
1383                        "failed to convert integer-compatible token count {v} to u32: {e}"
1384                    ))
1385                })
1386            } else {
1387                Err(D::Error::custom(format!(
1388                    "token count must be a non-negative integer-compatible number, got {v}"
1389                )))
1390            }
1391        }
1392    }
1393}
1394
1395#[cfg(test)]
1396mod tests {
1397    use super::*;
1398    use anyhow::Context as _;
1399
1400    // ===================
1401    // Constructor Tests
1402    // ===================
1403
1404    #[test]
1405    fn test_new_creates_provider_with_custom_model() {
1406        let provider = OpenAIProvider::new("test-api-key".to_string(), "custom-model".to_string());
1407
1408        assert_eq!(provider.model(), "custom-model");
1409        assert_eq!(provider.provider(), "openai");
1410        assert_eq!(provider.base_url, DEFAULT_BASE_URL);
1411    }
1412
1413    #[test]
1414    fn test_with_base_url_creates_provider_with_custom_url() {
1415        let provider = OpenAIProvider::with_base_url(
1416            "test-api-key".to_string(),
1417            "llama3".to_string(),
1418            "http://localhost:11434/v1".to_string(),
1419        );
1420
1421        assert_eq!(provider.model(), "llama3");
1422        assert_eq!(provider.base_url, "http://localhost:11434/v1");
1423    }
1424
1425    #[test]
1426    fn test_gpt4o_factory_creates_gpt4o_provider() {
1427        let provider = OpenAIProvider::gpt4o("test-api-key".to_string());
1428
1429        assert_eq!(provider.model(), MODEL_GPT4O);
1430        assert_eq!(provider.provider(), "openai");
1431    }
1432
1433    #[test]
1434    fn test_gpt4o_mini_factory_creates_gpt4o_mini_provider() {
1435        let provider = OpenAIProvider::gpt4o_mini("test-api-key".to_string());
1436
1437        assert_eq!(provider.model(), MODEL_GPT4O_MINI);
1438        assert_eq!(provider.provider(), "openai");
1439    }
1440
1441    #[test]
1442    fn test_gpt52_thinking_factory_creates_provider() {
1443        let provider = OpenAIProvider::gpt52_thinking("test-api-key".to_string());
1444
1445        assert_eq!(provider.model(), MODEL_GPT52_THINKING);
1446        assert_eq!(provider.provider(), "openai");
1447    }
1448
1449    #[test]
1450    fn test_gpt54_factory_creates_provider() {
1451        let provider = OpenAIProvider::gpt54("test-api-key".to_string());
1452
1453        assert_eq!(provider.model(), MODEL_GPT54);
1454        assert_eq!(provider.provider(), "openai");
1455    }
1456
1457    #[test]
1458    fn test_gpt53_codex_factory_creates_provider() {
1459        let provider = OpenAIProvider::gpt53_codex("test-api-key".to_string());
1460
1461        assert_eq!(provider.model(), MODEL_GPT53_CODEX);
1462        assert_eq!(provider.provider(), "openai");
1463    }
1464
1465    #[test]
1466    fn test_codex_factory_points_to_latest_codex_model() {
1467        let provider = OpenAIProvider::codex("test-api-key".to_string());
1468
1469        assert_eq!(provider.model(), MODEL_GPT53_CODEX);
1470        assert_eq!(provider.provider(), "openai");
1471    }
1472
1473    #[test]
1474    fn test_gpt5_factory_creates_gpt5_provider() {
1475        let provider = OpenAIProvider::gpt5("test-api-key".to_string());
1476
1477        assert_eq!(provider.model(), MODEL_GPT5);
1478        assert_eq!(provider.provider(), "openai");
1479    }
1480
1481    #[test]
1482    fn test_gpt5_mini_factory_creates_provider() {
1483        let provider = OpenAIProvider::gpt5_mini("test-api-key".to_string());
1484
1485        assert_eq!(provider.model(), MODEL_GPT5_MINI);
1486        assert_eq!(provider.provider(), "openai");
1487    }
1488
1489    #[test]
1490    fn test_o3_factory_creates_o3_provider() {
1491        let provider = OpenAIProvider::o3("test-api-key".to_string());
1492
1493        assert_eq!(provider.model(), MODEL_O3);
1494        assert_eq!(provider.provider(), "openai");
1495    }
1496
1497    #[test]
1498    fn test_o4_mini_factory_creates_o4_mini_provider() {
1499        let provider = OpenAIProvider::o4_mini("test-api-key".to_string());
1500
1501        assert_eq!(provider.model(), MODEL_O4_MINI);
1502        assert_eq!(provider.provider(), "openai");
1503    }
1504
1505    #[test]
1506    fn test_o1_factory_creates_o1_provider() {
1507        let provider = OpenAIProvider::o1("test-api-key".to_string());
1508
1509        assert_eq!(provider.model(), MODEL_O1);
1510        assert_eq!(provider.provider(), "openai");
1511    }
1512
1513    #[test]
1514    fn test_gpt41_factory_creates_gpt41_provider() {
1515        let provider = OpenAIProvider::gpt41("test-api-key".to_string());
1516
1517        assert_eq!(provider.model(), MODEL_GPT41);
1518        assert_eq!(provider.provider(), "openai");
1519    }
1520
1521    #[test]
1522    fn test_kimi_factory_creates_provider_with_kimi_base_url() {
1523        let provider = OpenAIProvider::kimi("test-api-key".to_string(), "kimi-custom".to_string());
1524
1525        assert_eq!(provider.model(), "kimi-custom");
1526        assert_eq!(provider.base_url, BASE_URL_KIMI);
1527        assert_eq!(provider.provider(), "openai");
1528    }
1529
1530    #[test]
1531    fn test_kimi_k2_5_factory_creates_provider() {
1532        let provider = OpenAIProvider::kimi_k2_5("test-api-key".to_string());
1533
1534        assert_eq!(provider.model(), MODEL_KIMI_K2_5);
1535        assert_eq!(provider.base_url, BASE_URL_KIMI);
1536        assert_eq!(provider.provider(), "openai");
1537    }
1538
1539    #[test]
1540    fn test_kimi_k2_thinking_factory_creates_provider() {
1541        let provider = OpenAIProvider::kimi_k2_thinking("test-api-key".to_string());
1542
1543        assert_eq!(provider.model(), MODEL_KIMI_K2_THINKING);
1544        assert_eq!(provider.base_url, BASE_URL_KIMI);
1545        assert_eq!(provider.provider(), "openai");
1546    }
1547
1548    #[test]
1549    fn test_zai_factory_creates_provider_with_zai_base_url() {
1550        let provider = OpenAIProvider::zai("test-api-key".to_string(), "glm-custom".to_string());
1551
1552        assert_eq!(provider.model(), "glm-custom");
1553        assert_eq!(provider.base_url, BASE_URL_ZAI);
1554        assert_eq!(provider.provider(), "openai");
1555    }
1556
1557    #[test]
1558    fn test_zai_glm5_factory_creates_provider() {
1559        let provider = OpenAIProvider::zai_glm5("test-api-key".to_string());
1560
1561        assert_eq!(provider.model(), MODEL_ZAI_GLM5);
1562        assert_eq!(provider.base_url, BASE_URL_ZAI);
1563        assert_eq!(provider.provider(), "openai");
1564    }
1565
1566    #[test]
1567    fn test_minimax_factory_creates_provider_with_minimax_base_url() {
1568        let provider =
1569            OpenAIProvider::minimax("test-api-key".to_string(), "minimax-custom".to_string());
1570
1571        assert_eq!(provider.model(), "minimax-custom");
1572        assert_eq!(provider.base_url, BASE_URL_MINIMAX);
1573        assert_eq!(provider.provider(), "openai");
1574    }
1575
1576    #[test]
1577    fn test_minimax_m2_5_factory_creates_provider() {
1578        let provider = OpenAIProvider::minimax_m2_5("test-api-key".to_string());
1579
1580        assert_eq!(provider.model(), MODEL_MINIMAX_M2_5);
1581        assert_eq!(provider.base_url, BASE_URL_MINIMAX);
1582        assert_eq!(provider.provider(), "openai");
1583    }
1584
1585    // ===================
1586    // Model Constants Tests
1587    // ===================
1588
1589    #[test]
1590    fn test_model_constants_have_expected_values() {
1591        // GPT-5.4 / GPT-5.3 Codex
1592        assert_eq!(MODEL_GPT54, "gpt-5.4");
1593        assert_eq!(MODEL_GPT53_CODEX, "gpt-5.3-codex");
1594        // GPT-5.2 series
1595        assert_eq!(MODEL_GPT52_INSTANT, "gpt-5.2-instant");
1596        assert_eq!(MODEL_GPT52_THINKING, "gpt-5.2-thinking");
1597        assert_eq!(MODEL_GPT52_PRO, "gpt-5.2-pro");
1598        assert_eq!(MODEL_GPT52_CODEX, "gpt-5.2-codex");
1599        // GPT-5 series
1600        assert_eq!(MODEL_GPT5, "gpt-5");
1601        assert_eq!(MODEL_GPT5_MINI, "gpt-5-mini");
1602        assert_eq!(MODEL_GPT5_NANO, "gpt-5-nano");
1603        // o-series
1604        assert_eq!(MODEL_O3, "o3");
1605        assert_eq!(MODEL_O3_MINI, "o3-mini");
1606        assert_eq!(MODEL_O4_MINI, "o4-mini");
1607        assert_eq!(MODEL_O1, "o1");
1608        assert_eq!(MODEL_O1_MINI, "o1-mini");
1609        // GPT-4.1 series
1610        assert_eq!(MODEL_GPT41, "gpt-4.1");
1611        assert_eq!(MODEL_GPT41_MINI, "gpt-4.1-mini");
1612        assert_eq!(MODEL_GPT41_NANO, "gpt-4.1-nano");
1613        // GPT-4o series
1614        assert_eq!(MODEL_GPT4O, "gpt-4o");
1615        assert_eq!(MODEL_GPT4O_MINI, "gpt-4o-mini");
1616        // OpenAI-compatible vendor defaults
1617        assert_eq!(MODEL_KIMI_K2_5, "kimi-k2.5");
1618        assert_eq!(MODEL_KIMI_K2_THINKING, "kimi-k2-thinking");
1619        assert_eq!(MODEL_ZAI_GLM5, "glm-5");
1620        assert_eq!(MODEL_MINIMAX_M2_5, "MiniMax-M2.5");
1621        assert_eq!(BASE_URL_KIMI, "https://api.moonshot.ai/v1");
1622        assert_eq!(BASE_URL_ZAI, "https://api.z.ai/api/paas/v4");
1623        assert_eq!(BASE_URL_MINIMAX, "https://api.minimax.io/v1");
1624    }
1625
1626    // ===================
1627    // Clone Tests
1628    // ===================
1629
1630    #[test]
1631    fn test_provider_is_cloneable() {
1632        let provider = OpenAIProvider::new("test-api-key".to_string(), "test-model".to_string());
1633        let cloned = provider.clone();
1634
1635        assert_eq!(provider.model(), cloned.model());
1636        assert_eq!(provider.provider(), cloned.provider());
1637        assert_eq!(provider.base_url, cloned.base_url);
1638    }
1639
1640    // ===================
1641    // API Type Serialization Tests
1642    // ===================
1643
1644    #[test]
1645    fn test_api_role_serialization() {
1646        let system_role = ApiRole::System;
1647        let user_role = ApiRole::User;
1648        let assistant_role = ApiRole::Assistant;
1649        let tool_role = ApiRole::Tool;
1650
1651        assert_eq!(serde_json::to_string(&system_role).unwrap(), "\"system\"");
1652        assert_eq!(serde_json::to_string(&user_role).unwrap(), "\"user\"");
1653        assert_eq!(
1654            serde_json::to_string(&assistant_role).unwrap(),
1655            "\"assistant\""
1656        );
1657        assert_eq!(serde_json::to_string(&tool_role).unwrap(), "\"tool\"");
1658    }
1659
1660    #[test]
1661    fn test_api_message_serialization_simple() {
1662        let message = ApiMessage {
1663            role: ApiRole::User,
1664            content: Some("Hello, world!".to_string()),
1665            reasoning_content: None,
1666            tool_calls: None,
1667            tool_call_id: None,
1668        };
1669
1670        let json = serde_json::to_string(&message).unwrap();
1671        assert!(json.contains("\"role\":\"user\""));
1672        assert!(json.contains("\"content\":\"Hello, world!\""));
1673        // Optional fields should be omitted
1674        assert!(!json.contains("tool_calls"));
1675        assert!(!json.contains("tool_call_id"));
1676    }
1677
1678    #[test]
1679    fn test_api_message_serialization_with_tool_calls() {
1680        let message = ApiMessage {
1681            role: ApiRole::Assistant,
1682            content: Some("Let me help.".to_string()),
1683            reasoning_content: None,
1684            tool_calls: Some(vec![ApiToolCall {
1685                id: "call_123".to_string(),
1686                r#type: "function".to_string(),
1687                function: ApiFunctionCall {
1688                    name: "read_file".to_string(),
1689                    arguments: "{\"path\": \"/test.txt\"}".to_string(),
1690                },
1691            }]),
1692            tool_call_id: None,
1693        };
1694
1695        let json = serde_json::to_string(&message).unwrap();
1696        assert!(json.contains("\"role\":\"assistant\""));
1697        assert!(json.contains("\"tool_calls\""));
1698        assert!(json.contains("\"id\":\"call_123\""));
1699        assert!(json.contains("\"type\":\"function\""));
1700        assert!(json.contains("\"name\":\"read_file\""));
1701    }
1702
1703    #[test]
1704    fn test_api_tool_message_serialization() {
1705        let message = ApiMessage {
1706            role: ApiRole::Tool,
1707            content: Some("File contents here".to_string()),
1708            reasoning_content: None,
1709            tool_calls: None,
1710            tool_call_id: Some("call_123".to_string()),
1711        };
1712
1713        let json = serde_json::to_string(&message).unwrap();
1714        assert!(json.contains("\"role\":\"tool\""));
1715        assert!(json.contains("\"tool_call_id\":\"call_123\""));
1716        assert!(json.contains("\"content\":\"File contents here\""));
1717    }
1718
1719    #[test]
1720    fn test_api_tool_serialization() {
1721        let tool = ApiTool {
1722            r#type: "function".to_string(),
1723            function: ApiFunction {
1724                name: "test_tool".to_string(),
1725                description: "A test tool".to_string(),
1726                parameters: serde_json::json!({
1727                    "type": "object",
1728                    "properties": {
1729                        "arg": {"type": "string"}
1730                    }
1731                }),
1732            },
1733        };
1734
1735        let json = serde_json::to_string(&tool).unwrap();
1736        assert!(json.contains("\"type\":\"function\""));
1737        assert!(json.contains("\"name\":\"test_tool\""));
1738        assert!(json.contains("\"description\":\"A test tool\""));
1739        assert!(json.contains("\"parameters\""));
1740    }
1741
1742    // ===================
1743    // API Type Deserialization Tests
1744    // ===================
1745
1746    #[test]
1747    fn test_api_response_deserialization() {
1748        let json = r#"{
1749            "id": "chatcmpl-123",
1750            "choices": [
1751                {
1752                    "message": {
1753                        "content": "Hello!"
1754                    },
1755                    "finish_reason": "stop"
1756                }
1757            ],
1758            "model": "gpt-4o",
1759            "usage": {
1760                "prompt_tokens": 100,
1761                "completion_tokens": 50
1762            }
1763        }"#;
1764
1765        let response: ApiChatResponse = serde_json::from_str(json).unwrap();
1766        assert_eq!(response.id, "chatcmpl-123");
1767        assert_eq!(response.model, "gpt-4o");
1768        assert_eq!(response.usage.prompt_tokens, 100);
1769        assert_eq!(response.usage.completion_tokens, 50);
1770        assert_eq!(response.choices.len(), 1);
1771        assert_eq!(
1772            response.choices[0].message.content,
1773            Some("Hello!".to_string())
1774        );
1775    }
1776
1777    #[test]
1778    fn test_api_response_with_tool_calls_deserialization() {
1779        let json = r#"{
1780            "id": "chatcmpl-456",
1781            "choices": [
1782                {
1783                    "message": {
1784                        "content": null,
1785                        "tool_calls": [
1786                            {
1787                                "id": "call_abc",
1788                                "type": "function",
1789                                "function": {
1790                                    "name": "read_file",
1791                                    "arguments": "{\"path\": \"test.txt\"}"
1792                                }
1793                            }
1794                        ]
1795                    },
1796                    "finish_reason": "tool_calls"
1797                }
1798            ],
1799            "model": "gpt-4o",
1800            "usage": {
1801                "prompt_tokens": 150,
1802                "completion_tokens": 30
1803            }
1804        }"#;
1805
1806        let response: ApiChatResponse = serde_json::from_str(json).unwrap();
1807        let tool_calls = response.choices[0].message.tool_calls.as_ref().unwrap();
1808        assert_eq!(tool_calls.len(), 1);
1809        assert_eq!(tool_calls[0].id, "call_abc");
1810        assert_eq!(tool_calls[0].function.name, "read_file");
1811    }
1812
1813    #[test]
1814    fn test_api_response_with_unknown_finish_reason_deserialization() {
1815        let json = r#"{
1816            "id": "chatcmpl-789",
1817            "choices": [
1818                {
1819                    "message": {
1820                        "content": "ok"
1821                    },
1822                    "finish_reason": "vendor_custom_reason"
1823                }
1824            ],
1825            "model": "glm-5",
1826            "usage": {
1827                "prompt_tokens": 10,
1828                "completion_tokens": 5
1829            }
1830        }"#;
1831
1832        let response: ApiChatResponse = serde_json::from_str(json).unwrap();
1833        assert_eq!(
1834            response.choices[0].finish_reason.as_deref(),
1835            Some("vendor_custom_reason")
1836        );
1837        assert_eq!(
1838            map_finish_reason(response.choices[0].finish_reason.as_deref().unwrap()),
1839            StopReason::StopSequence
1840        );
1841    }
1842
1843    #[test]
1844    fn test_map_finish_reason_covers_vendor_specific_values() {
1845        assert_eq!(map_finish_reason("stop"), StopReason::EndTurn);
1846        assert_eq!(map_finish_reason("tool_calls"), StopReason::ToolUse);
1847        assert_eq!(map_finish_reason("length"), StopReason::MaxTokens);
1848        assert_eq!(
1849            map_finish_reason("content_filter"),
1850            StopReason::StopSequence
1851        );
1852        assert_eq!(map_finish_reason("sensitive"), StopReason::Refusal);
1853        assert_eq!(map_finish_reason("network_error"), StopReason::StopSequence);
1854        assert_eq!(
1855            map_finish_reason("some_new_reason"),
1856            StopReason::StopSequence
1857        );
1858    }
1859
1860    // ===================
1861    // Message Conversion Tests
1862    // ===================
1863
1864    #[test]
1865    fn test_build_api_messages_with_system() {
1866        let request = ChatRequest {
1867            system: "You are helpful.".to_string(),
1868            messages: vec![agent_sdk_foundation::llm::Message::user("Hello")],
1869            tools: None,
1870            max_tokens: 1024,
1871            max_tokens_explicit: true,
1872            session_id: None,
1873            cached_content: None,
1874            thinking: None,
1875            tool_choice: None,
1876            response_format: None,
1877        };
1878
1879        let api_messages = build_api_messages(&request);
1880        assert_eq!(api_messages.len(), 2);
1881        assert_eq!(api_messages[0].role, ApiRole::System);
1882        assert_eq!(
1883            api_messages[0].content,
1884            Some("You are helpful.".to_string())
1885        );
1886        assert_eq!(api_messages[1].role, ApiRole::User);
1887        assert_eq!(api_messages[1].content, Some("Hello".to_string()));
1888    }
1889
1890    #[test]
1891    fn test_build_api_messages_empty_system() {
1892        let request = ChatRequest {
1893            system: String::new(),
1894            messages: vec![agent_sdk_foundation::llm::Message::user("Hello")],
1895            tools: None,
1896            max_tokens: 1024,
1897            max_tokens_explicit: true,
1898            session_id: None,
1899            cached_content: None,
1900            thinking: None,
1901            tool_choice: None,
1902            response_format: None,
1903        };
1904
1905        let api_messages = build_api_messages(&request);
1906        assert_eq!(api_messages.len(), 1);
1907        assert_eq!(api_messages[0].role, ApiRole::User);
1908    }
1909
1910    fn request_with_messages(messages: Vec<agent_sdk_foundation::llm::Message>) -> ChatRequest {
1911        ChatRequest {
1912            system: String::new(),
1913            messages,
1914            tools: None,
1915            max_tokens: 1024,
1916            max_tokens_explicit: true,
1917            session_id: None,
1918            cached_content: None,
1919            thinking: None,
1920            tool_choice: None,
1921            response_format: None,
1922        }
1923    }
1924
1925    #[test]
1926    fn test_build_api_messages_echoes_assistant_reasoning_content_on_tool_call()
1927    -> anyhow::Result<()> {
1928        // DeepSeek V4 thinking-mode requires the prior assistant turn's
1929        // reasoning to be echoed back as `reasoning_content` ONLY on a turn
1930        // that also performed a tool call, or the API 400s.
1931        let request = request_with_messages(vec![
1932            agent_sdk_foundation::llm::Message::user("What is the weather?"),
1933            agent_sdk_foundation::llm::Message::assistant_with_content(vec![
1934                ContentBlock::Thinking {
1935                    thinking: "I should call the weather tool.".to_string(),
1936                    signature: None,
1937                },
1938                ContentBlock::ToolUse {
1939                    id: "call_1".to_string(),
1940                    name: "get_weather".to_string(),
1941                    input: serde_json::json!({"city": "Paris"}),
1942                    thought_signature: None,
1943                },
1944            ]),
1945        ]);
1946
1947        let api_messages = build_api_messages(&request);
1948        let assistant = api_messages
1949            .iter()
1950            .find(|m| m.role == ApiRole::Assistant)
1951            .context("assistant message present")?;
1952        assert!(assistant.tool_calls.is_some());
1953        assert_eq!(
1954            assistant.reasoning_content,
1955            Some("I should call the weather tool.".to_string())
1956        );
1957        Ok(())
1958    }
1959
1960    #[test]
1961    fn test_build_api_messages_reasoning_content_serializes_on_tool_call_turn() -> anyhow::Result<()>
1962    {
1963        let request = request_with_messages(vec![
1964            agent_sdk_foundation::llm::Message::assistant_with_content(vec![
1965                ContentBlock::Thinking {
1966                    thinking: "thinking out loud".to_string(),
1967                    signature: None,
1968                },
1969                ContentBlock::ToolUse {
1970                    id: "call_1".to_string(),
1971                    name: "do_thing".to_string(),
1972                    input: serde_json::json!({}),
1973                    thought_signature: None,
1974                },
1975            ]),
1976        ]);
1977
1978        let api_messages = build_api_messages(&request);
1979        let json = serde_json::to_string(&api_messages).context("serialize api messages")?;
1980        assert!(json.contains("\"reasoning_content\":\"thinking out loud\""));
1981        Ok(())
1982    }
1983
1984    #[test]
1985    fn test_build_api_messages_reasoning_only_turn_is_not_echoed() -> anyhow::Result<()> {
1986        // A reasoning-only assistant turn (no visible text, no tool call) must
1987        // NOT carry reasoning_content: legacy `deepseek-reasoner` 400s if
1988        // reasoning_content appears in input, and DeepSeek V4 thinking-mode only
1989        // needs it on tool-call turns. With no other payload the turn collapses
1990        // to nothing and is dropped entirely.
1991        let request = request_with_messages(vec![
1992            agent_sdk_foundation::llm::Message::assistant_with_content(vec![
1993                ContentBlock::Thinking {
1994                    thinking: "pondering".to_string(),
1995                    signature: None,
1996                },
1997            ]),
1998        ]);
1999
2000        let api_messages = build_api_messages(&request);
2001        let json = serde_json::to_string(&api_messages).context("serialize api messages")?;
2002        assert!(!json.contains("reasoning_content"));
2003        assert!(api_messages.is_empty());
2004        Ok(())
2005    }
2006
2007    #[test]
2008    fn test_build_api_messages_reasoning_with_text_no_tool_call_is_not_echoed() -> anyhow::Result<()>
2009    {
2010        // An assistant turn carrying reasoning + visible text but NO tool call
2011        // is emitted for its text, but its reasoning is NOT echoed back.
2012        let request = request_with_messages(vec![
2013            agent_sdk_foundation::llm::Message::user("What is 2+2?"),
2014            agent_sdk_foundation::llm::Message::assistant_with_content(vec![
2015                ContentBlock::Thinking {
2016                    thinking: "Let me add 2 and 2.".to_string(),
2017                    signature: None,
2018                },
2019                ContentBlock::Text {
2020                    text: "4".to_string(),
2021                },
2022            ]),
2023            agent_sdk_foundation::llm::Message::user("And 3+3?"),
2024        ]);
2025
2026        let api_messages = build_api_messages(&request);
2027        let json = serde_json::to_string(&api_messages).context("serialize api messages")?;
2028        assert!(!json.contains("reasoning_content"));
2029        let assistant = api_messages
2030            .iter()
2031            .find(|m| m.role == ApiRole::Assistant)
2032            .context("assistant message present")?;
2033        assert_eq!(assistant.content, Some("4".to_string()));
2034        assert_eq!(assistant.reasoning_content, None);
2035        Ok(())
2036    }
2037
2038    #[test]
2039    fn test_build_api_messages_normal_path_has_no_reasoning_content() -> anyhow::Result<()> {
2040        // Normal path unchanged: an assistant turn with no Thinking block must
2041        // not attach reasoning_content.
2042        let request = request_with_messages(vec![
2043            agent_sdk_foundation::llm::Message::user("hi"),
2044            agent_sdk_foundation::llm::Message::assistant_with_content(vec![ContentBlock::Text {
2045                text: "hello".to_string(),
2046            }]),
2047        ]);
2048
2049        let api_messages = build_api_messages(&request);
2050        let json = serde_json::to_string(&api_messages).context("serialize api messages")?;
2051        assert!(!json.contains("reasoning_content"));
2052        let assistant = api_messages
2053            .iter()
2054            .find(|m| m.role == ApiRole::Assistant)
2055            .context("assistant message present")?;
2056        assert_eq!(assistant.reasoning_content, None);
2057        Ok(())
2058    }
2059
2060    #[test]
2061    fn test_build_api_messages_does_not_attach_reasoning_to_user_blocks() {
2062        // A user turn carrying a Thinking block (unusual, but possible) must not
2063        // be turned into a reasoning_content echo.
2064        let request =
2065            request_with_messages(vec![agent_sdk_foundation::llm::Message::user_with_content(
2066                vec![
2067                    ContentBlock::Thinking {
2068                        thinking: "user-side thinking".to_string(),
2069                        signature: None,
2070                    },
2071                    ContentBlock::Text {
2072                        text: "question".to_string(),
2073                    },
2074                ],
2075            )]);
2076
2077        let api_messages = build_api_messages(&request);
2078        assert_eq!(api_messages.len(), 1);
2079        assert_eq!(api_messages[0].role, ApiRole::User);
2080        assert_eq!(api_messages[0].reasoning_content, None);
2081    }
2082
2083    #[test]
2084    fn test_convert_tool() {
2085        let tool = agent_sdk_foundation::llm::Tool {
2086            name: "test_tool".to_string(),
2087            description: "A test tool".to_string(),
2088            input_schema: serde_json::json!({"type": "object"}),
2089            display_name: "Test Tool".to_string(),
2090            tier: agent_sdk_foundation::ToolTier::Observe,
2091        };
2092
2093        let api_tool = convert_tool(tool);
2094        assert_eq!(api_tool.r#type, "function");
2095        assert_eq!(api_tool.function.name, "test_tool");
2096        assert_eq!(api_tool.function.description, "A test tool");
2097    }
2098
2099    #[test]
2100    fn test_build_content_blocks_text_only() {
2101        let message = ApiResponseMessage {
2102            content: Some("Hello!".to_string()),
2103            tool_calls: None,
2104            reasoning_content: None,
2105            reasoning: None,
2106        };
2107
2108        let blocks = build_content_blocks(&message);
2109        assert_eq!(blocks.len(), 1);
2110        assert!(matches!(&blocks[0], ContentBlock::Text { text } if text == "Hello!"));
2111    }
2112
2113    #[test]
2114    fn test_build_content_blocks_with_tool_calls() {
2115        let message = ApiResponseMessage {
2116            content: Some("Let me help.".to_string()),
2117            tool_calls: Some(vec![ApiResponseToolCall {
2118                id: "call_123".to_string(),
2119                function: ApiResponseFunctionCall {
2120                    name: "read_file".to_string(),
2121                    arguments: "{\"path\": \"test.txt\"}".to_string(),
2122                },
2123            }]),
2124            reasoning_content: None,
2125            reasoning: None,
2126        };
2127
2128        let blocks = build_content_blocks(&message);
2129        assert_eq!(blocks.len(), 2);
2130        assert!(matches!(&blocks[0], ContentBlock::Text { text } if text == "Let me help."));
2131        assert!(
2132            matches!(&blocks[1], ContentBlock::ToolUse { id, name, .. } if id == "call_123" && name == "read_file")
2133        );
2134    }
2135
2136    #[test]
2137    fn test_build_content_blocks_falls_back_to_reasoning_content_when_content_empty() {
2138        // DeepSeek-style: answer / usable output arrives in reasoning_content
2139        // while content is null. Without the fallback this dropped all output.
2140        let message = ApiResponseMessage {
2141            content: None,
2142            tool_calls: None,
2143            reasoning_content: Some("The answer is 42.".to_string()),
2144            reasoning: None,
2145        };
2146
2147        let blocks = build_content_blocks(&message);
2148        assert_eq!(blocks.len(), 1);
2149        assert!(
2150            matches!(&blocks[0], ContentBlock::Thinking { thinking, signature } if thinking == "The answer is 42." && signature.is_none())
2151        );
2152    }
2153
2154    #[test]
2155    fn test_build_content_blocks_falls_back_to_reasoning_field() {
2156        // Some OpenRouter upstreams normalize reasoning under `reasoning`.
2157        let message = ApiResponseMessage {
2158            content: Some(String::new()),
2159            tool_calls: None,
2160            reasoning_content: None,
2161            reasoning: Some("Considering options...".to_string()),
2162        };
2163
2164        let blocks = build_content_blocks(&message);
2165        assert_eq!(blocks.len(), 1);
2166        assert!(
2167            matches!(&blocks[0], ContentBlock::Thinking { thinking, .. } if thinking == "Considering options...")
2168        );
2169    }
2170
2171    #[test]
2172    fn test_build_content_blocks_prefers_reasoning_content_over_reasoning() {
2173        let message = ApiResponseMessage {
2174            content: None,
2175            tool_calls: None,
2176            reasoning_content: Some("primary".to_string()),
2177            reasoning: Some("secondary".to_string()),
2178        };
2179
2180        let blocks = build_content_blocks(&message);
2181        assert_eq!(blocks.len(), 1);
2182        assert!(
2183            matches!(&blocks[0], ContentBlock::Thinking { thinking, .. } if thinking == "primary")
2184        );
2185    }
2186
2187    #[test]
2188    fn test_build_content_blocks_does_not_add_reasoning_when_content_present() {
2189        // The normal content-present case must be unchanged: reasoning is NOT
2190        // surfaced as a Thinking block when there is usable text content.
2191        let message = ApiResponseMessage {
2192            content: Some("Final answer.".to_string()),
2193            tool_calls: None,
2194            reasoning_content: Some("internal chain of thought".to_string()),
2195            reasoning: None,
2196        };
2197
2198        let blocks = build_content_blocks(&message);
2199        assert_eq!(blocks.len(), 1);
2200        assert!(matches!(&blocks[0], ContentBlock::Text { text } if text == "Final answer."));
2201    }
2202
2203    #[test]
2204    fn test_build_content_blocks_reasoning_fallback_with_tool_calls() {
2205        // Empty content + reasoning + a tool call: surface the reasoning AND the
2206        // tool call (reasoning model under tight max_tokens that still tool-called).
2207        let message = ApiResponseMessage {
2208            content: None,
2209            tool_calls: Some(vec![ApiResponseToolCall {
2210                id: "call_1".to_string(),
2211                function: ApiResponseFunctionCall {
2212                    name: "search".to_string(),
2213                    arguments: "{}".to_string(),
2214                },
2215            }]),
2216            reasoning_content: Some("I should search.".to_string()),
2217            reasoning: None,
2218        };
2219
2220        let blocks = build_content_blocks(&message);
2221        assert_eq!(blocks.len(), 2);
2222        assert!(
2223            matches!(&blocks[0], ContentBlock::Thinking { thinking, .. } if thinking == "I should search.")
2224        );
2225        assert!(matches!(&blocks[1], ContentBlock::ToolUse { name, .. } if name == "search"));
2226    }
2227
2228    #[test]
2229    fn test_build_content_blocks_empty_message_yields_no_blocks() {
2230        // Genuine truncation with no reasoning text: still produce nothing
2231        // (behavior unchanged for the empty case).
2232        let message = ApiResponseMessage {
2233            content: None,
2234            tool_calls: None,
2235            reasoning_content: None,
2236            reasoning: None,
2237        };
2238
2239        let blocks = build_content_blocks(&message);
2240        assert!(blocks.is_empty());
2241    }
2242
2243    #[test]
2244    fn test_api_response_message_deserializes_reasoning_content() {
2245        let json = r#"{
2246            "content": null,
2247            "reasoning_content": "step by step"
2248        }"#;
2249
2250        let message: ApiResponseMessage = serde_json::from_str(json).unwrap();
2251        assert_eq!(reasoning_text(&message), Some("step by step"));
2252        assert!(message.content.is_none());
2253    }
2254
2255    // ===================
2256    // SSE Streaming Type Tests
2257    // ===================
2258
2259    #[test]
2260    fn test_sse_chunk_text_delta_deserialization() {
2261        let json = r#"{
2262            "choices": [{
2263                "delta": {
2264                    "content": "Hello"
2265                },
2266                "finish_reason": null
2267            }]
2268        }"#;
2269
2270        let chunk: SseChunk = serde_json::from_str(json).unwrap();
2271        assert_eq!(chunk.choices.len(), 1);
2272        assert_eq!(chunk.choices[0].delta.content, Some("Hello".to_string()));
2273        assert!(chunk.choices[0].finish_reason.is_none());
2274    }
2275
2276    #[test]
2277    fn test_sse_chunk_tool_call_delta_deserialization() {
2278        let json = r#"{
2279            "choices": [{
2280                "delta": {
2281                    "tool_calls": [{
2282                        "index": 0,
2283                        "id": "call_abc",
2284                        "function": {
2285                            "name": "read_file",
2286                            "arguments": ""
2287                        }
2288                    }]
2289                },
2290                "finish_reason": null
2291            }]
2292        }"#;
2293
2294        let chunk: SseChunk = serde_json::from_str(json).unwrap();
2295        let tool_calls = chunk.choices[0].delta.tool_calls.as_ref().unwrap();
2296        assert_eq!(tool_calls.len(), 1);
2297        assert_eq!(tool_calls[0].index, 0);
2298        assert_eq!(tool_calls[0].id, Some("call_abc".to_string()));
2299        assert_eq!(
2300            tool_calls[0].function.as_ref().unwrap().name,
2301            Some("read_file".to_string())
2302        );
2303    }
2304
2305    #[test]
2306    fn test_sse_chunk_tool_call_arguments_delta_deserialization() {
2307        let json = r#"{
2308            "choices": [{
2309                "delta": {
2310                    "tool_calls": [{
2311                        "index": 0,
2312                        "function": {
2313                            "arguments": "{\"path\":"
2314                        }
2315                    }]
2316                },
2317                "finish_reason": null
2318            }]
2319        }"#;
2320
2321        let chunk: SseChunk = serde_json::from_str(json).unwrap();
2322        let tool_calls = chunk.choices[0].delta.tool_calls.as_ref().unwrap();
2323        assert_eq!(tool_calls[0].id, None);
2324        assert_eq!(
2325            tool_calls[0].function.as_ref().unwrap().arguments,
2326            Some("{\"path\":".to_string())
2327        );
2328    }
2329
2330    #[test]
2331    fn test_sse_chunk_with_finish_reason_deserialization() {
2332        let json = r#"{
2333            "choices": [{
2334                "delta": {},
2335                "finish_reason": "stop"
2336            }]
2337        }"#;
2338
2339        let chunk: SseChunk = serde_json::from_str(json).unwrap();
2340        assert_eq!(chunk.choices[0].finish_reason.as_deref(), Some("stop"));
2341    }
2342
2343    #[test]
2344    fn test_sse_chunk_with_usage_deserialization() {
2345        let json = r#"{
2346            "choices": [{
2347                "delta": {},
2348                "finish_reason": "stop"
2349            }],
2350            "usage": {
2351                "prompt_tokens": 100,
2352                "completion_tokens": 50
2353            }
2354        }"#;
2355
2356        let chunk: SseChunk = serde_json::from_str(json).unwrap();
2357        let usage = chunk.usage.unwrap();
2358        assert_eq!(usage.prompt_tokens, 100);
2359        assert_eq!(usage.completion_tokens, 50);
2360    }
2361
2362    #[test]
2363    fn test_sse_chunk_with_float_usage_deserialization() {
2364        let json = r#"{
2365            "choices": [{
2366                "delta": {},
2367                "finish_reason": "stop"
2368            }],
2369            "usage": {
2370                "prompt_tokens": 100.0,
2371                "completion_tokens": 50.0
2372            }
2373        }"#;
2374
2375        let chunk: SseChunk = serde_json::from_str(json).unwrap();
2376        let usage = chunk.usage.unwrap();
2377        assert_eq!(usage.prompt_tokens, 100);
2378        assert_eq!(usage.completion_tokens, 50);
2379    }
2380
2381    #[test]
2382    fn test_api_usage_deserializes_integer_compatible_numbers() {
2383        let json = r#"{
2384            "prompt_tokens": 42.0,
2385            "completion_tokens": 7
2386        }"#;
2387
2388        let usage: ApiUsage = serde_json::from_str(json).unwrap();
2389        assert_eq!(usage.prompt_tokens, 42);
2390        assert_eq!(usage.completion_tokens, 7);
2391    }
2392
2393    #[test]
2394    fn test_api_usage_deserializes_cached_tokens() {
2395        let json = r#"{
2396            "prompt_tokens": 42,
2397            "completion_tokens": 7,
2398            "prompt_tokens_details": {
2399                "cached_tokens": 10
2400            }
2401        }"#;
2402
2403        let usage: ApiUsage = serde_json::from_str(json).unwrap();
2404        assert_eq!(usage.prompt_tokens, 42);
2405        assert_eq!(usage.completion_tokens, 7);
2406        assert_eq!(usage.prompt_tokens_details.unwrap().cached_tokens, 10);
2407    }
2408
2409    #[test]
2410    fn test_process_sse_data_maps_cached_tokens_to_cache_read_usage() {
2411        let results = process_sse_data(
2412            r#"{
2413                "choices": [],
2414                "usage": {
2415                    "prompt_tokens": 42,
2416                    "completion_tokens": 7,
2417                    "prompt_tokens_details": {
2418                        "cached_tokens": 10
2419                    }
2420                }
2421            }"#,
2422        );
2423
2424        assert!(matches!(
2425            results.as_slice(),
2426            [SseProcessResult::Usage(Usage {
2427                input_tokens: 42,
2428                output_tokens: 7,
2429                cached_input_tokens: 10,
2430                cache_creation_input_tokens: 0,
2431            })]
2432        ));
2433    }
2434
2435    #[test]
2436    fn test_sse_delta_deserializes_reasoning_fields() -> anyhow::Result<()> {
2437        // The streaming delta struct must accept DeepSeek `reasoning_content`
2438        // and OpenRouter-normalized `reasoning` so reasoning tokens are not
2439        // dropped on deserialization.
2440        let chunk: SseChunk = serde_json::from_str(
2441            r#"{
2442                "choices": [{
2443                    "delta": {
2444                        "reasoning_content": "step one"
2445                    },
2446                    "finish_reason": null
2447                }]
2448            }"#,
2449        )
2450        .context("deserialize sse chunk")?;
2451        assert_eq!(
2452            chunk.choices[0].delta.reasoning_content,
2453            Some("step one".to_string())
2454        );
2455        assert!(chunk.choices[0].delta.content.is_none());
2456        Ok(())
2457    }
2458
2459    #[test]
2460    fn test_process_sse_data_emits_thinking_delta_from_reasoning_content() {
2461        // Reasoning-model fallback under streaming: a delta whose visible
2462        // `content` is absent but whose `reasoning_content` carries tokens must
2463        // surface as a ThinkingDelta, mirroring the non-streaming fallback so the
2464        // output is not silently dropped.
2465        let results = process_sse_data(
2466            r#"{
2467                "choices": [{
2468                    "delta": { "reasoning_content": "thinking..." },
2469                    "finish_reason": null
2470                }]
2471            }"#,
2472        );
2473
2474        assert!(matches!(
2475            results.as_slice(),
2476            [SseProcessResult::ThinkingDelta(text)] if text == "thinking..."
2477        ));
2478    }
2479
2480    #[test]
2481    fn test_process_sse_data_emits_thinking_delta_from_reasoning_field() {
2482        // OpenRouter-normalized `reasoning` field is an equivalent fallback.
2483        let results = process_sse_data(
2484            r#"{
2485                "choices": [{
2486                    "delta": { "reasoning": "pondering" },
2487                    "finish_reason": null
2488                }]
2489            }"#,
2490        );
2491
2492        assert!(matches!(
2493            results.as_slice(),
2494            [SseProcessResult::ThinkingDelta(text)] if text == "pondering"
2495        ));
2496    }
2497
2498    #[test]
2499    fn test_process_sse_data_prefers_text_content_over_reasoning() {
2500        // When visible `content` is present, it takes precedence and the
2501        // reasoning fallback does not fire (mirrors non-streaming behavior).
2502        let results = process_sse_data(
2503            r#"{
2504                "choices": [{
2505                    "delta": {
2506                        "content": "answer",
2507                        "reasoning_content": "ignored"
2508                    },
2509                    "finish_reason": null
2510                }]
2511            }"#,
2512        );
2513
2514        assert!(matches!(
2515            results.as_slice(),
2516            [SseProcessResult::TextDelta(text)] if text == "answer"
2517        ));
2518    }
2519
2520    #[test]
2521    fn test_process_sse_data_empty_content_falls_back_to_reasoning() {
2522        // An explicitly empty `content` string must still trigger the reasoning
2523        // fallback rather than emitting an empty TextDelta.
2524        let results = process_sse_data(
2525            r#"{
2526                "choices": [{
2527                    "delta": {
2528                        "content": "",
2529                        "reasoning_content": "fallback"
2530                    },
2531                    "finish_reason": null
2532                }]
2533            }"#,
2534        );
2535
2536        assert!(matches!(
2537            results.as_slice(),
2538            [SseProcessResult::ThinkingDelta(text)] if text == "fallback"
2539        ));
2540    }
2541
2542    #[test]
2543    fn test_api_usage_rejects_fractional_numbers() {
2544        let json = r#"{
2545            "prompt_tokens": 42.5,
2546            "completion_tokens": 7
2547        }"#;
2548
2549        let usage: std::result::Result<ApiUsage, _> = serde_json::from_str(json);
2550        assert!(usage.is_err());
2551    }
2552
2553    #[test]
2554    fn test_use_max_tokens_alias_for_vendor_urls() {
2555        assert!(!use_max_tokens_alias(DEFAULT_BASE_URL));
2556        assert!(use_max_tokens_alias(BASE_URL_KIMI));
2557        assert!(use_max_tokens_alias(BASE_URL_ZAI));
2558        assert!(use_max_tokens_alias(BASE_URL_MINIMAX));
2559    }
2560
2561    #[test]
2562    fn test_requires_responses_api_only_for_legacy_codex_model() {
2563        assert!(requires_responses_api(MODEL_GPT52_CODEX));
2564        assert!(!requires_responses_api(MODEL_GPT53_CODEX));
2565        assert!(!requires_responses_api(MODEL_GPT54));
2566    }
2567
2568    #[test]
2569    fn test_should_use_responses_api_for_official_agentic_requests() {
2570        let request = ChatRequest {
2571            system: String::new(),
2572            messages: vec![agent_sdk_foundation::llm::Message::user("Hello")],
2573            tools: Some(vec![agent_sdk_foundation::llm::Tool {
2574                name: "read_file".to_string(),
2575                description: "Read a file".to_string(),
2576                input_schema: serde_json::json!({"type": "object"}),
2577                display_name: "Read File".to_string(),
2578                tier: agent_sdk_foundation::ToolTier::Observe,
2579            }]),
2580            max_tokens: 1024,
2581            max_tokens_explicit: true,
2582            session_id: Some("thread-1".to_string()),
2583            cached_content: None,
2584            thinking: None,
2585            tool_choice: None,
2586            response_format: None,
2587        };
2588
2589        assert!(should_use_responses_api(
2590            DEFAULT_BASE_URL,
2591            MODEL_GPT54,
2592            &request
2593        ));
2594        assert!(!should_use_responses_api(
2595            BASE_URL_KIMI,
2596            MODEL_GPT54,
2597            &request
2598        ));
2599    }
2600
2601    #[test]
2602    fn test_build_api_reasoning_maps_enabled_budget_to_effort() {
2603        let reasoning = build_api_reasoning(Some(&ThinkingConfig::new(40_000))).unwrap();
2604        assert!(matches!(reasoning.effort, ReasoningEffort::XHigh));
2605    }
2606
2607    #[test]
2608    fn test_build_api_reasoning_uses_explicit_effort() {
2609        let reasoning =
2610            build_api_reasoning(Some(&ThinkingConfig::adaptive_with_effort(Effort::High))).unwrap();
2611        assert!(matches!(reasoning.effort, ReasoningEffort::High));
2612    }
2613
2614    #[test]
2615    fn test_build_api_reasoning_omits_adaptive_without_effort() {
2616        assert!(build_api_reasoning(Some(&ThinkingConfig::adaptive())).is_none());
2617    }
2618
2619    #[test]
2620    fn test_openai_rejects_adaptive_thinking() {
2621        let provider = OpenAIProvider::gpt54("test-key".to_string());
2622        let error = provider
2623            .validate_thinking_config(Some(&ThinkingConfig::adaptive()))
2624            .unwrap_err();
2625        assert!(
2626            error
2627                .to_string()
2628                .contains("adaptive thinking is not supported")
2629        );
2630    }
2631
2632    #[test]
2633    fn test_openai_non_reasoning_models_reject_thinking() {
2634        let provider = OpenAIProvider::gpt4o("test-key".to_string());
2635        let error = provider
2636            .validate_thinking_config(Some(&ThinkingConfig::new(10_000)))
2637            .unwrap_err();
2638        assert!(error.to_string().contains("thinking is not supported"));
2639    }
2640
2641    #[test]
2642    fn test_request_serialization_openai_uses_max_completion_tokens_only() {
2643        let messages = vec![ApiMessage {
2644            role: ApiRole::User,
2645            content: Some("Hello".to_string()),
2646            reasoning_content: None,
2647            tool_calls: None,
2648            tool_call_id: None,
2649        }];
2650
2651        let request = ApiChatRequest {
2652            model: "gpt-4o",
2653            messages: &messages,
2654            max_completion_tokens: Some(1024),
2655            max_tokens: None,
2656            tools: None,
2657            tool_choice: None,
2658            reasoning: None,
2659            response_format: None,
2660        };
2661
2662        let json = serde_json::to_string(&request).unwrap();
2663        assert!(json.contains("\"max_completion_tokens\":1024"));
2664        assert!(!json.contains("\"max_tokens\""));
2665    }
2666
2667    #[test]
2668    fn test_request_serialization_with_max_tokens_alias() {
2669        let messages = vec![ApiMessage {
2670            role: ApiRole::User,
2671            content: Some("Hello".to_string()),
2672            reasoning_content: None,
2673            tool_calls: None,
2674            tool_call_id: None,
2675        }];
2676
2677        let request = ApiChatRequest {
2678            model: "glm-5",
2679            messages: &messages,
2680            max_completion_tokens: Some(1024),
2681            max_tokens: Some(1024),
2682            tools: None,
2683            tool_choice: None,
2684            reasoning: None,
2685            response_format: None,
2686        };
2687
2688        let json = serde_json::to_string(&request).unwrap();
2689        assert!(json.contains("\"max_completion_tokens\":1024"));
2690        assert!(json.contains("\"max_tokens\":1024"));
2691    }
2692
2693    #[test]
2694    fn test_streaming_request_serialization_openai_default() {
2695        let messages = vec![ApiMessage {
2696            role: ApiRole::User,
2697            content: Some("Hello".to_string()),
2698            reasoning_content: None,
2699            tool_calls: None,
2700            tool_call_id: None,
2701        }];
2702
2703        let request = ApiChatRequestStreaming {
2704            model: "gpt-4o",
2705            messages: &messages,
2706            max_completion_tokens: Some(1024),
2707            max_tokens: None,
2708            tools: None,
2709            tool_choice: None,
2710            reasoning: None,
2711            response_format: None,
2712            stream_options: Some(ApiStreamOptions {
2713                include_usage: true,
2714            }),
2715            stream: true,
2716        };
2717
2718        let json = serde_json::to_string(&request).unwrap();
2719        assert!(json.contains("\"stream\":true"));
2720        assert!(json.contains("\"model\":\"gpt-4o\""));
2721        assert!(json.contains("\"max_completion_tokens\":1024"));
2722        assert!(json.contains("\"stream_options\":{\"include_usage\":true}"));
2723        assert!(!json.contains("\"max_tokens\""));
2724    }
2725
2726    #[test]
2727    fn test_streaming_request_serialization_with_max_tokens_alias() {
2728        let messages = vec![ApiMessage {
2729            role: ApiRole::User,
2730            content: Some("Hello".to_string()),
2731            reasoning_content: None,
2732            tool_calls: None,
2733            tool_call_id: None,
2734        }];
2735
2736        let request = ApiChatRequestStreaming {
2737            model: "kimi-k2-thinking",
2738            messages: &messages,
2739            max_completion_tokens: Some(1024),
2740            max_tokens: Some(1024),
2741            tools: None,
2742            tool_choice: None,
2743            reasoning: None,
2744            response_format: None,
2745            stream_options: None,
2746            stream: true,
2747        };
2748
2749        let json = serde_json::to_string(&request).unwrap();
2750        assert!(json.contains("\"max_completion_tokens\":1024"));
2751        assert!(json.contains("\"max_tokens\":1024"));
2752        assert!(!json.contains("\"stream_options\""));
2753    }
2754
2755    #[test]
2756    fn test_request_serialization_includes_reasoning_when_present() {
2757        let messages = vec![ApiMessage {
2758            role: ApiRole::User,
2759            content: Some("Hello".to_string()),
2760            reasoning_content: None,
2761            tool_calls: None,
2762            tool_call_id: None,
2763        }];
2764
2765        let request = ApiChatRequest {
2766            model: MODEL_GPT54,
2767            messages: &messages,
2768            max_completion_tokens: Some(1024),
2769            max_tokens: None,
2770            tools: None,
2771            tool_choice: None,
2772            reasoning: Some(ApiReasoning {
2773                effort: ReasoningEffort::High,
2774            }),
2775            response_format: None,
2776        };
2777
2778        let json = serde_json::to_string(&request).unwrap();
2779        assert!(json.contains("\"reasoning\":{\"effort\":\"high\"}"));
2780    }
2781
2782    #[test]
2783    fn test_response_format_serializes_as_json_schema() {
2784        let messages = vec![ApiMessage {
2785            role: ApiRole::User,
2786            content: Some("Hello".to_string()),
2787            reasoning_content: None,
2788            tool_calls: None,
2789            tool_call_id: None,
2790        }];
2791
2792        let response_format = Some(ApiResponseFormat::from_response_format(
2793            &agent_sdk_foundation::llm::ResponseFormat::new(
2794                "person",
2795                serde_json::json!({"type": "object"}),
2796            ),
2797        ));
2798
2799        let request = ApiChatRequest {
2800            model: "gpt-4o",
2801            messages: &messages,
2802            max_completion_tokens: Some(1024),
2803            max_tokens: None,
2804            tools: None,
2805            tool_choice: None,
2806            reasoning: None,
2807            response_format,
2808        };
2809
2810        let json = serde_json::to_value(&request).unwrap();
2811        assert_eq!(json["response_format"]["type"], "json_schema");
2812        assert_eq!(json["response_format"]["json_schema"]["name"], "person");
2813        assert_eq!(json["response_format"]["json_schema"]["strict"], true);
2814        assert_eq!(
2815            json["response_format"]["json_schema"]["schema"]["type"],
2816            "object"
2817        );
2818    }
2819
2820    #[test]
2821    fn test_response_format_omitted_when_absent() {
2822        let messages = vec![ApiMessage {
2823            role: ApiRole::User,
2824            content: Some("Hello".to_string()),
2825            reasoning_content: None,
2826            tool_calls: None,
2827            tool_call_id: None,
2828        }];
2829
2830        let request = ApiChatRequest {
2831            model: "gpt-4o",
2832            messages: &messages,
2833            max_completion_tokens: Some(1024),
2834            max_tokens: None,
2835            tools: None,
2836            tool_choice: None,
2837            reasoning: None,
2838            response_format: None,
2839        };
2840
2841        let json = serde_json::to_string(&request).unwrap();
2842        assert!(!json.contains("response_format"));
2843    }
2844}