Skip to main content

agent_sdk_providers/impls/
openai.rs

1//! `OpenAI` API provider implementation.
2//!
3//! This module provides an implementation of `LlmProvider` for the `OpenAI`
4//! Chat Completions API. It also supports `OpenAI`-compatible APIs (Ollama, vLLM, etc.)
5//! via the `with_base_url` constructor.
6//!
7//! # Transparent Responses-API reroute
8//!
9//! Some requests cannot be served by Chat Completions and are transparently
10//! rerouted to the `OpenAI` Responses API
11//! ([`OpenAIResponsesProvider`]). The reroute (`should_use_responses_api`) fires
12//! when:
13//!
14//! - the model only exists on the Responses surface (e.g. `gpt-5.2-codex`), or
15//! - the request carries attachments (images / documents), or
16//! - the request is *agentic* (has tools or tool-use/tool-result blocks) against
17//!   the official `api.openai.com` base URL.
18//!
19//! The reroute forwards the provider's pooled HTTP client and `extra_headers`
20//! (the BYOK / gateway auth mechanism) so a rerouted request keeps connection
21//! reuse and authenticates identically to a non-rerouted one.
22
23use crate::attachments::{request_has_attachments, validate_request_attachments};
24use crate::provider::LlmProvider;
25use crate::streaming::{SseLineBuffer, StreamBox, StreamDelta, StreamErrorKind};
26use agent_sdk_foundation::llm::{
27    ChatOutcome, ChatRequest, ChatResponse, Content, ContentBlock, Effort, StopReason,
28    ThinkingConfig, ThinkingMode, Usage,
29};
30use anyhow::Result;
31use async_trait::async_trait;
32use futures::StreamExt;
33use reqwest::StatusCode;
34use serde::de::Error as _;
35use serde::{Deserialize, Serialize};
36use std::collections::HashMap;
37
38use super::openai_responses::OpenAIResponsesProvider;
39
40const DEFAULT_BASE_URL: &str = "https://api.openai.com/v1";
41
42/// Build an HTTP client with connect/keepalive timeouts matching the sibling
43/// providers (`anthropic`, `vertex`). A bare `reqwest::Client::new()` has no
44/// connect timeout, so a black-holed connect would wedge `chat`/`chat_stream`
45/// forever.
46fn build_http_client() -> reqwest::Client {
47    reqwest::Client::builder()
48        .connect_timeout(std::time::Duration::from_secs(30))
49        .tcp_keepalive(std::time::Duration::from_secs(30))
50        .build()
51        .unwrap_or_default()
52}
53
54/// Check if a model requires the Responses API instead of Chat Completions.
55fn requires_responses_api(model: &str) -> bool {
56    model == MODEL_GPT52_CODEX
57}
58
59fn is_official_openai_base_url(base_url: &str) -> bool {
60    base_url == DEFAULT_BASE_URL || base_url.contains("api.openai.com")
61}
62
63fn request_is_agentic(request: &ChatRequest) -> bool {
64    request
65        .tools
66        .as_ref()
67        .is_some_and(|tools| !tools.is_empty()) || request.messages.iter().any(|message| {
68        matches!(
69            &message.content,
70            Content::Blocks(blocks)
71                if blocks.iter().any(|block| {
72                    matches!(block, ContentBlock::ToolUse { .. } | ContentBlock::ToolResult { .. })
73                })
74        )
75    })
76}
77
78fn should_use_responses_api(base_url: &str, model: &str, request: &ChatRequest) -> bool {
79    requires_responses_api(model)
80        || request_has_attachments(request)
81        || (is_official_openai_base_url(base_url) && request_is_agentic(request))
82}
83
84// GPT-5.4 series
85pub const MODEL_GPT54: &str = "gpt-5.4";
86
87// GPT-5.3 Codex series
88pub const MODEL_GPT53_CODEX: &str = "gpt-5.3-codex";
89
90// GPT-5.2 series
91pub const MODEL_GPT52_INSTANT: &str = "gpt-5.2-instant";
92pub const MODEL_GPT52_THINKING: &str = "gpt-5.2-thinking";
93pub const MODEL_GPT52_PRO: &str = "gpt-5.2-pro";
94pub const MODEL_GPT52_CODEX: &str = "gpt-5.2-codex";
95
96// GPT-5 series (400k context)
97pub const MODEL_GPT5: &str = "gpt-5";
98pub const MODEL_GPT5_MINI: &str = "gpt-5-mini";
99pub const MODEL_GPT5_NANO: &str = "gpt-5-nano";
100
101// o-series reasoning models
102pub const MODEL_O3: &str = "o3";
103pub const MODEL_O3_MINI: &str = "o3-mini";
104pub const MODEL_O4_MINI: &str = "o4-mini";
105pub const MODEL_O1: &str = "o1";
106pub const MODEL_O1_MINI: &str = "o1-mini";
107
108// GPT-4.1 series (improved instruction following, 1M context)
109pub const MODEL_GPT41: &str = "gpt-4.1";
110pub const MODEL_GPT41_MINI: &str = "gpt-4.1-mini";
111pub const MODEL_GPT41_NANO: &str = "gpt-4.1-nano";
112
113// GPT-4o series
114pub const MODEL_GPT4O: &str = "gpt-4o";
115pub const MODEL_GPT4O_MINI: &str = "gpt-4o-mini";
116
117// OpenAI-compatible vendor defaults
118pub const BASE_URL_KIMI: &str = "https://api.moonshot.ai/v1";
119pub const BASE_URL_ZAI: &str = "https://api.z.ai/api/paas/v4";
120pub const BASE_URL_MINIMAX: &str = "https://api.minimax.io/v1";
121pub const MODEL_KIMI_K2_5: &str = "kimi-k2.5";
122pub const MODEL_KIMI_K2_THINKING: &str = "kimi-k2-thinking";
123pub const MODEL_ZAI_GLM5: &str = "glm-5";
124pub const MODEL_MINIMAX_M2_5: &str = "MiniMax-M2.5";
125
126/// `OpenAI` LLM provider using the Chat Completions API.
127///
128/// Also supports `OpenAI`-compatible APIs (Ollama, vLLM, Azure `OpenAI`, etc.)
129/// via the `with_base_url` constructor.
130#[derive(Clone)]
131pub struct OpenAIProvider {
132    client: reqwest::Client,
133    api_key: String,
134    model: String,
135    base_url: String,
136    thinking: Option<ThinkingConfig>,
137    /// Extra headers applied to every request (e.g. for gateway authentication).
138    extra_headers: Vec<(String, String)>,
139}
140
141impl OpenAIProvider {
142    /// The conventional environment variable holding the `OpenAI` API key.
143    pub const API_KEY_ENV: &'static str = "OPENAI_API_KEY";
144
145    /// Create a new `OpenAI` provider with the specified API key and model.
146    #[must_use]
147    pub fn new(api_key: impl Into<String>, model: impl Into<String>) -> Self {
148        Self {
149            client: build_http_client(),
150            api_key: api_key.into(),
151            model: model.into(),
152            base_url: DEFAULT_BASE_URL.to_owned(),
153            thinking: None,
154            extra_headers: Vec::new(),
155        }
156    }
157
158    /// Create a provider using GPT-5, reading the API key from the
159    /// conventional [`OPENAI_API_KEY`](Self::API_KEY_ENV) environment variable.
160    ///
161    /// # Panics
162    ///
163    /// Panics if `OPENAI_API_KEY` is not set. Prefer
164    /// [`try_from_env`](Self::try_from_env) outside of examples/tests.
165    #[must_use]
166    pub fn from_env() -> Self {
167        Self::try_from_env().unwrap_or_else(|e| panic!("{e}"))
168    }
169
170    /// Create a provider using GPT-5, reading the API key from the
171    /// conventional [`OPENAI_API_KEY`](Self::API_KEY_ENV) environment variable.
172    ///
173    /// # Errors
174    ///
175    /// Returns an error if `OPENAI_API_KEY` is unset or not valid UTF-8.
176    pub fn try_from_env() -> Result<Self> {
177        let api_key = std::env::var(Self::API_KEY_ENV).map_err(|_| {
178            anyhow::anyhow!("environment variable `{}` is not set", Self::API_KEY_ENV)
179        })?;
180        Ok(Self::gpt5(api_key))
181    }
182
183    /// Create a new provider with a custom base URL for OpenAI-compatible APIs.
184    #[must_use]
185    pub fn with_base_url(
186        api_key: impl Into<String>,
187        model: impl Into<String>,
188        base_url: impl Into<String>,
189    ) -> Self {
190        Self {
191            client: build_http_client(),
192            api_key: api_key.into(),
193            model: model.into(),
194            base_url: base_url.into(),
195            thinking: None,
196            extra_headers: Vec::new(),
197        }
198    }
199
200    /// Create a provider using Moonshot KIMI via OpenAI-compatible Chat Completions.
201    #[must_use]
202    pub fn kimi(api_key: String, model: String) -> Self {
203        Self::with_base_url(api_key, model, BASE_URL_KIMI.to_owned())
204    }
205
206    /// Create a provider using KIMI K2.5 (default KIMI model).
207    #[must_use]
208    pub fn kimi_k2_5(api_key: String) -> Self {
209        Self::kimi(api_key, MODEL_KIMI_K2_5.to_owned())
210    }
211
212    /// Create a provider using KIMI K2 Thinking.
213    #[must_use]
214    pub fn kimi_k2_thinking(api_key: String) -> Self {
215        Self::kimi(api_key, MODEL_KIMI_K2_THINKING.to_owned())
216    }
217
218    /// Create a provider using z.ai via OpenAI-compatible Chat Completions.
219    #[must_use]
220    pub fn zai(api_key: String, model: String) -> Self {
221        Self::with_base_url(api_key, model, BASE_URL_ZAI.to_owned())
222    }
223
224    /// Create a provider using z.ai GLM-5 (default z.ai agentic reasoning model).
225    #[must_use]
226    pub fn zai_glm5(api_key: String) -> Self {
227        Self::zai(api_key, MODEL_ZAI_GLM5.to_owned())
228    }
229
230    /// Create a provider using `MiniMax` via OpenAI-compatible Chat Completions.
231    #[must_use]
232    pub fn minimax(api_key: String, model: String) -> Self {
233        Self::with_base_url(api_key, model, BASE_URL_MINIMAX.to_owned())
234    }
235
236    /// Create a provider using `MiniMax` M2.5 (default `MiniMax` model).
237    #[must_use]
238    pub fn minimax_m2_5(api_key: String) -> Self {
239        Self::minimax(api_key, MODEL_MINIMAX_M2_5.to_owned())
240    }
241
242    /// Create a provider using GPT-5.2 Instant (speed-optimized for routine queries).
243    #[must_use]
244    pub fn gpt52_instant(api_key: String) -> Self {
245        Self::new(api_key, MODEL_GPT52_INSTANT.to_owned())
246    }
247
248    /// Create a provider using GPT-5.4 (frontier reasoning with 1.05M context).
249    #[must_use]
250    pub fn gpt54(api_key: String) -> Self {
251        Self::new(api_key, MODEL_GPT54.to_owned())
252    }
253
254    /// Create a provider using GPT-5.3 Codex (latest codex model).
255    #[must_use]
256    pub fn gpt53_codex(api_key: String) -> Self {
257        Self::new(api_key, MODEL_GPT53_CODEX.to_owned())
258    }
259
260    /// Create a provider using GPT-5.2 Thinking (complex reasoning, coding, analysis).
261    #[must_use]
262    pub fn gpt52_thinking(api_key: String) -> Self {
263        Self::new(api_key, MODEL_GPT52_THINKING.to_owned())
264    }
265
266    /// Create a provider using GPT-5.2 Pro (maximum accuracy for difficult problems).
267    #[must_use]
268    pub fn gpt52_pro(api_key: String) -> Self {
269        Self::new(api_key, MODEL_GPT52_PRO.to_owned())
270    }
271
272    /// Create a provider using the latest Codex model.
273    #[must_use]
274    pub fn codex(api_key: String) -> Self {
275        Self::gpt53_codex(api_key)
276    }
277
278    /// Create a provider using GPT-5 (400k context, coding and reasoning).
279    #[must_use]
280    pub fn gpt5(api_key: String) -> Self {
281        Self::new(api_key, MODEL_GPT5.to_owned())
282    }
283
284    /// Create a provider using GPT-5-mini (faster, cost-efficient GPT-5).
285    #[must_use]
286    pub fn gpt5_mini(api_key: String) -> Self {
287        Self::new(api_key, MODEL_GPT5_MINI.to_owned())
288    }
289
290    /// Create a provider using GPT-5-nano (fastest, cheapest GPT-5 variant).
291    #[must_use]
292    pub fn gpt5_nano(api_key: String) -> Self {
293        Self::new(api_key, MODEL_GPT5_NANO.to_owned())
294    }
295
296    /// Create a provider using o3 (most intelligent reasoning model).
297    #[must_use]
298    pub fn o3(api_key: String) -> Self {
299        Self::new(api_key, MODEL_O3.to_owned())
300    }
301
302    /// Create a provider using o3-mini (smaller o3 variant).
303    #[must_use]
304    pub fn o3_mini(api_key: String) -> Self {
305        Self::new(api_key, MODEL_O3_MINI.to_owned())
306    }
307
308    /// Create a provider using o4-mini (fast, cost-efficient reasoning).
309    #[must_use]
310    pub fn o4_mini(api_key: String) -> Self {
311        Self::new(api_key, MODEL_O4_MINI.to_owned())
312    }
313
314    /// Create a provider using o1 (reasoning model).
315    #[must_use]
316    pub fn o1(api_key: String) -> Self {
317        Self::new(api_key, MODEL_O1.to_owned())
318    }
319
320    /// Create a provider using o1-mini (fast reasoning model).
321    #[must_use]
322    pub fn o1_mini(api_key: String) -> Self {
323        Self::new(api_key, MODEL_O1_MINI.to_owned())
324    }
325
326    /// Create a provider using GPT-4.1 (improved instruction following, 1M context).
327    #[must_use]
328    pub fn gpt41(api_key: String) -> Self {
329        Self::new(api_key, MODEL_GPT41.to_owned())
330    }
331
332    /// Create a provider using GPT-4.1-mini (smaller, faster GPT-4.1).
333    #[must_use]
334    pub fn gpt41_mini(api_key: String) -> Self {
335        Self::new(api_key, MODEL_GPT41_MINI.to_owned())
336    }
337
338    /// Create a provider using GPT-4o.
339    #[must_use]
340    pub fn gpt4o(api_key: String) -> Self {
341        Self::new(api_key, MODEL_GPT4O.to_owned())
342    }
343
344    /// Create a provider using GPT-4o-mini (fast and cost-effective).
345    #[must_use]
346    pub fn gpt4o_mini(api_key: String) -> Self {
347        Self::new(api_key, MODEL_GPT4O_MINI.to_owned())
348    }
349
350    /// Set the provider-owned thinking configuration for this model.
351    #[must_use]
352    pub const fn with_thinking(mut self, thinking: ThinkingConfig) -> Self {
353        self.thinking = Some(thinking);
354        self
355    }
356
357    /// Add extra HTTP headers applied to every request.
358    #[must_use]
359    pub fn with_extra_headers(mut self, headers: Vec<(String, String)>) -> Self {
360        self.extra_headers = headers;
361        self
362    }
363
364    /// Apply auth + extra headers. Skips `Authorization` when `api_key` is
365    /// empty (BYOK gateway mode — auth handled via `extra_headers`).
366    fn apply_headers(&self, builder: reqwest::RequestBuilder) -> reqwest::RequestBuilder {
367        let builder = if self.api_key.is_empty() {
368            builder
369        } else {
370            builder.header("Authorization", format!("Bearer {}", self.api_key))
371        };
372        self.extra_headers
373            .iter()
374            .fold(builder, |b, (k, v)| b.header(k.as_str(), v.as_str()))
375    }
376
377    /// Build the `OpenAIResponsesProvider` used for the transparent Responses-API
378    /// reroute, forwarding this provider's pooled client, thinking config, and
379    /// extra headers so the rerouted request reuses connections and authenticates
380    /// identically (critical for BYOK / gateway setups with an empty `api_key`).
381    fn responses_reroute(&self) -> OpenAIResponsesProvider {
382        let mut provider = OpenAIResponsesProvider::with_base_url(
383            self.api_key.clone(),
384            self.model.clone(),
385            self.base_url.clone(),
386        )
387        .with_client(self.client.clone())
388        .with_extra_headers(self.extra_headers.clone());
389        if let Some(thinking) = self.thinking.clone() {
390            provider = provider.with_thinking(thinking);
391        }
392        provider
393    }
394}
395
396#[async_trait]
397impl LlmProvider for OpenAIProvider {
398    async fn chat(&self, request: ChatRequest) -> Result<ChatOutcome> {
399        // Route official OpenAI agentic flows to the Responses API, preserving
400        // the pooled client and extra_headers (BYOK / gateway auth).
401        if should_use_responses_api(&self.base_url, &self.model, &request) {
402            return self.responses_reroute().chat(request).await;
403        }
404
405        let thinking_config = match self.resolve_thinking_config(request.thinking.as_ref()) {
406            Ok(thinking) => thinking,
407            Err(error) => return Ok(ChatOutcome::InvalidRequest(error.to_string())),
408        };
409        if let Err(error) = validate_request_attachments(self.provider(), self.model(), &request) {
410            return Ok(ChatOutcome::InvalidRequest(error.to_string()));
411        }
412        let reasoning = build_api_reasoning(thinking_config.as_ref());
413        let messages = build_api_messages(&request);
414        let tools: Option<Vec<ApiTool>> = request
415            .tools
416            .map(|ts| ts.into_iter().map(convert_tool).collect());
417        let tool_choice = request
418            .tool_choice
419            .as_ref()
420            .map(ApiToolChoice::from_tool_choice);
421        let response_format = request
422            .response_format
423            .as_ref()
424            .map(ApiResponseFormat::from_response_format);
425
426        let include_max_tokens_alias = use_max_tokens_alias(&self.base_url);
427        let api_request = ApiChatRequest {
428            model: &self.model,
429            messages: &messages,
430            max_completion_tokens: Some(request.max_tokens),
431            max_tokens: include_max_tokens_alias.then_some(request.max_tokens),
432            tools: tools.as_deref(),
433            tool_choice,
434            reasoning,
435            response_format,
436        };
437
438        log::debug!(
439            "OpenAI LLM request model={} max_tokens={}",
440            self.model,
441            request.max_tokens
442        );
443
444        let builder = self
445            .client
446            .post(format!("{}/chat/completions", self.base_url))
447            .header("Content-Type", "application/json");
448        let response = self
449            .apply_headers(builder)
450            .json(&api_request)
451            .send()
452            .await
453            .map_err(|e| anyhow::anyhow!("request failed: {e}"))?;
454
455        let status = response.status();
456        // Read `Retry-After` off the 429 response before the body is consumed.
457        let retry_after = if status == StatusCode::TOO_MANY_REQUESTS {
458            crate::http::retry_after_from_headers(response.headers())
459        } else {
460            None
461        };
462        let bytes = response
463            .bytes()
464            .await
465            .map_err(|e| anyhow::anyhow!("failed to read response body: {e}"))?;
466
467        log::debug!(
468            "OpenAI LLM response status={} body_len={}",
469            status,
470            bytes.len()
471        );
472
473        decode_chat_response(status, &bytes, retry_after)
474    }
475
476    #[allow(clippy::too_many_lines)]
477    fn chat_stream(&self, request: ChatRequest) -> StreamBox<'_> {
478        // Route official OpenAI agentic flows to the Responses API, preserving
479        // the pooled client and extra_headers (BYOK / gateway auth).
480        if should_use_responses_api(&self.base_url, &self.model, &request) {
481            let responses_provider = self.responses_reroute();
482            return Box::pin(async_stream::stream! {
483                let mut stream = std::pin::pin!(responses_provider.chat_stream(request));
484                while let Some(item) = futures::StreamExt::next(&mut stream).await {
485                    yield item;
486                }
487            });
488        }
489
490        Box::pin(async_stream::stream! {
491            let thinking_config = match self.resolve_thinking_config(request.thinking.as_ref()) {
492                Ok(thinking) => thinking,
493                Err(error) => {
494                    yield Ok(StreamDelta::Error {
495                        message: error.to_string(),
496                        kind: StreamErrorKind::InvalidRequest,
497                    });
498                    return;
499                }
500            };
501            if let Err(error) = validate_request_attachments(self.provider(), self.model(), &request) {
502                yield Ok(StreamDelta::Error {
503                    message: error.to_string(),
504                    kind: StreamErrorKind::InvalidRequest,
505                });
506                return;
507            }
508            let reasoning = build_api_reasoning(thinking_config.as_ref());
509            let messages = build_api_messages(&request);
510            let tools: Option<Vec<ApiTool>> = request
511                .tools
512                .map(|ts| ts.into_iter().map(convert_tool).collect());
513            let tool_choice = request
514                .tool_choice
515                .as_ref()
516                .map(ApiToolChoice::from_tool_choice);
517            let response_format = request
518                .response_format
519                .as_ref()
520                .map(ApiResponseFormat::from_response_format);
521
522            let include_max_tokens_alias = use_max_tokens_alias(&self.base_url);
523            let include_stream_usage = use_stream_usage_options(&self.base_url);
524            let include_openrouter_usage = use_openrouter_usage_options(&self.base_url);
525            let api_request = ApiChatRequestStreaming {
526                model: &self.model,
527                messages: &messages,
528                max_completion_tokens: Some(request.max_tokens),
529                max_tokens: include_max_tokens_alias.then_some(request.max_tokens),
530                tools: tools.as_deref(),
531                tool_choice,
532                reasoning,
533                response_format,
534                stream_options: include_stream_usage.then_some(ApiStreamOptions {
535                    include_usage: true,
536                }),
537                usage: include_openrouter_usage
538                    .then_some(ApiOpenRouterUsageOptions { include: true }),
539                stream: true,
540            };
541
542            log::debug!("OpenAI streaming LLM request model={} max_tokens={}", self.model, request.max_tokens);
543
544            let stream_builder = self.client
545                .post(format!("{}/chat/completions", self.base_url))
546                .header("Content-Type", "application/json");
547            let Ok(response) = self
548                .apply_headers(stream_builder)
549                .json(&api_request)
550                .send()
551                .await
552            else {
553                yield Err(anyhow::anyhow!("request failed"));
554                return;
555            };
556
557            let status = response.status();
558
559            if !status.is_success() {
560                let body = response.text().await.unwrap_or_default();
561                let (kind, level) = if status == StatusCode::TOO_MANY_REQUESTS {
562                    (StreamErrorKind::RateLimited, "rate_limit")
563                } else if status.is_server_error() {
564                    (StreamErrorKind::ServerError, "server_error")
565                } else {
566                    (StreamErrorKind::InvalidRequest, "client_error")
567                };
568                log::warn!("OpenAI error status={status} body={body} kind={level}");
569                yield Ok(StreamDelta::Error { message: body, kind });
570                return;
571            }
572
573            // Track tool call state across deltas
574            let mut tool_calls: HashMap<usize, ToolCallAccumulator> = HashMap::new();
575            let mut usage: Option<Usage> = None;
576            // The stop reason from `finish_reason`. With stream_options.include_usage
577            // (official OpenAI) the usage arrives in a SEPARATE trailing chunk
578            // (choices: []) AFTER finish_reason and before [DONE], so we record the
579            // stop reason and keep consuming until [DONE] / stream end rather than
580            // returning early and dropping that usage chunk.
581            let mut stop_reason: Option<StopReason> = None;
582            let mut sse = SseLineBuffer::new();
583            let mut stream = response.bytes_stream();
584
585            while let Some(chunk_result) = stream.next().await {
586                let chunk = match chunk_result {
587                    Ok(chunk) => chunk,
588                    Err(error) => {
589                        yield Err(anyhow::anyhow!("stream error: {error}"));
590                        return;
591                    }
592                };
593                sse.extend(&chunk);
594
595                while let Some(line) = sse.next_line() {
596                    let line = line.trim();
597                    if line.is_empty() { continue; }
598                    let Some(data) = line.strip_prefix("data: ") else { continue; };
599
600                    let outcome = step_completion_stream(
601                        data,
602                        &mut tool_calls,
603                        &mut usage,
604                        &mut stop_reason,
605                    );
606                    for delta in outcome.immediate { yield Ok(delta); }
607                    if let Some(terminal) = outcome.terminal {
608                        for delta in terminal { yield Ok(delta); }
609                        return;
610                    }
611                }
612            }
613
614            // Stream ended without [DONE] - emit what we have. Infer the stop
615            // reason from accumulated tool calls (same heuristic as the [DONE]
616            // arm) so a stream that dies mid-tool-call doesn't report EndTurn.
617            let sr = stop_reason.unwrap_or_else(|| fallback_stream_stop_reason(&tool_calls));
618            for delta in build_stream_end_deltas(&tool_calls, usage.take(), sr) {
619                yield Ok(delta);
620            }
621        })
622    }
623
624    async fn list_models(&self) -> Result<Vec<crate::provider::ModelInfo>> {
625        let builder = self
626            .client
627            .get(format!("{}/models", self.base_url))
628            .header("Content-Type", "application/json");
629        let builder = self.apply_headers(builder);
630        let body = crate::impls::model_listing::fetch_model_list_body(builder, "OpenAI").await?;
631        parse_models_list(&body)
632    }
633
634    fn model(&self) -> &str {
635        &self.model
636    }
637
638    fn provider(&self) -> &'static str {
639        "openai"
640    }
641
642    fn configured_thinking(&self) -> Option<&ThinkingConfig> {
643        self.thinking.as_ref()
644    }
645}
646
647/// Parse the `OpenAI` `GET /models` response body into [`ModelInfo`] rows.
648///
649/// The Chat Completions list endpoint returns `{ "data": [{ "id", ... }] }`
650/// and reports neither a display name nor token limits, so those fields stay
651/// `None`. This shape is shared by the OpenAI-compatible vendor APIs.
652fn parse_models_list(body: &str) -> Result<Vec<crate::provider::ModelInfo>> {
653    #[derive(Deserialize)]
654    struct ListResponse {
655        #[serde(default)]
656        data: Vec<ModelRow>,
657    }
658    #[derive(Deserialize)]
659    struct ModelRow {
660        id: String,
661    }
662    let parsed: ListResponse = serde_json::from_str(body)
663        .map_err(|e| anyhow::anyhow!("failed to parse OpenAI models list: {e}"))?;
664    Ok(parsed
665        .data
666        .into_iter()
667        .map(|row| crate::provider::ModelInfo {
668            id: row.id,
669            display_name: None,
670            context_window: None,
671            max_output_tokens: None,
672        })
673        .collect())
674}
675
676/// Apply a tool call update to the accumulator.
677fn apply_tool_call_update(
678    tool_calls: &mut std::collections::HashMap<usize, ToolCallAccumulator>,
679    index: usize,
680    id: Option<String>,
681    name: Option<String>,
682    arguments: Option<String>,
683) {
684    let entry = tool_calls
685        .entry(index)
686        .or_insert_with(|| ToolCallAccumulator {
687            id: String::new(),
688            name: String::new(),
689            arguments: String::new(),
690        });
691    if let Some(id) = id {
692        entry.id = id;
693    }
694    if let Some(name) = name {
695        entry.name = name;
696    }
697    if let Some(args) = arguments {
698        entry.arguments.push_str(&args);
699    }
700}
701
702/// Immediate + terminal deltas produced by feeding one SSE `data:` line to the
703/// Chat Completions streaming state.
704struct SseLineOutcome {
705    /// Deltas to yield immediately (text / thinking).
706    immediate: Vec<StreamDelta>,
707    /// When `Some`, the stream finished ([DONE] received): yield these terminal
708    /// deltas (tool calls + usage + Done) and stop.
709    terminal: Option<Vec<StreamDelta>>,
710}
711
712/// Feed one SSE `data:` payload to the streaming state, accumulating tool calls,
713/// usage, and the stop reason.
714///
715/// Text/thinking deltas are returned for immediate emission. A `finish_reason`
716/// only records the stop reason (it does NOT finalize) so a trailing usage-only
717/// chunk that official `OpenAI` sends after `finish_reason` is still folded in;
718/// finalization happens on the `[DONE]` sentinel.
719fn step_completion_stream(
720    data: &str,
721    tool_calls: &mut HashMap<usize, ToolCallAccumulator>,
722    usage: &mut Option<Usage>,
723    stop_reason: &mut Option<StopReason>,
724) -> SseLineOutcome {
725    let mut immediate = Vec::new();
726    for result in process_sse_data(data) {
727        match result {
728            SseProcessResult::TextDelta(c) => {
729                immediate.push(StreamDelta::TextDelta {
730                    delta: c,
731                    block_index: 0,
732                });
733            }
734            SseProcessResult::ThinkingDelta(c) => {
735                immediate.push(StreamDelta::ThinkingDelta {
736                    delta: c,
737                    block_index: 0,
738                });
739            }
740            SseProcessResult::ToolCallUpdate {
741                index,
742                id,
743                name,
744                arguments,
745            } => apply_tool_call_update(tool_calls, index, id, name, arguments),
746            SseProcessResult::Usage(u) => *usage = Some(u),
747            SseProcessResult::Done(sr) => *stop_reason = Some(sr),
748            SseProcessResult::Sentinel => {
749                let sr = stop_reason.unwrap_or_else(|| fallback_stream_stop_reason(tool_calls));
750                let terminal = build_stream_end_deltas(tool_calls, usage.take(), sr);
751                return SseLineOutcome {
752                    immediate,
753                    terminal: Some(terminal),
754                };
755            }
756        }
757    }
758    SseLineOutcome {
759        immediate,
760        terminal: None,
761    }
762}
763
764/// Helper to emit tool call deltas and done event.
765fn build_stream_end_deltas(
766    tool_calls: &std::collections::HashMap<usize, ToolCallAccumulator>,
767    usage: Option<Usage>,
768    stop_reason: StopReason,
769) -> Vec<StreamDelta> {
770    let mut deltas = Vec::new();
771
772    // Emit tool calls. `idx` comes from the wire `tool_calls[].index`; use
773    // saturating_add so a hostile `usize::MAX` index cannot overflow-panic in
774    // debug builds. StreamAccumulator sorts by index so order stays stable.
775    for (idx, tool) in tool_calls {
776        let block_index = idx.saturating_add(1);
777        deltas.push(StreamDelta::ToolUseStart {
778            id: tool.id.clone(),
779            name: tool.name.clone(),
780            block_index,
781            thought_signature: None,
782        });
783        deltas.push(StreamDelta::ToolInputDelta {
784            id: tool.id.clone(),
785            delta: tool.arguments.clone(),
786            block_index,
787        });
788    }
789
790    // Emit usage
791    if let Some(u) = usage {
792        deltas.push(StreamDelta::Usage(u));
793    }
794
795    // Emit done
796    deltas.push(StreamDelta::Done {
797        stop_reason: Some(stop_reason),
798    });
799
800    deltas
801}
802
803/// Result of processing an SSE chunk.
804enum SseProcessResult {
805    /// Emit a text delta.
806    TextDelta(String),
807    /// Emit a thinking/reasoning delta (reasoning-model fallback when the model
808    /// streams its output via `reasoning_content`/`reasoning` and `content` is
809    /// empty, mirroring the non-streaming `build_content_blocks` fallback).
810    ThinkingDelta(String),
811    /// Update tool call accumulator (index, optional id, optional name, optional args).
812    ToolCallUpdate {
813        index: usize,
814        id: Option<String>,
815        name: Option<String>,
816        arguments: Option<String>,
817    },
818    /// Usage information.
819    Usage(Usage),
820    /// Stream is done with a stop reason.
821    Done(StopReason),
822    /// Stream sentinel [DONE] was received.
823    Sentinel,
824}
825
826/// Process an SSE data line and return results to apply.
827fn process_sse_data(data: &str) -> Vec<SseProcessResult> {
828    if data == "[DONE]" {
829        return vec![SseProcessResult::Sentinel];
830    }
831
832    let Ok(chunk) = serde_json::from_str::<SseChunk>(data) else {
833        return vec![];
834    };
835
836    let mut results = Vec::new();
837
838    // Extract usage if present
839    if let Some(u) = chunk.usage {
840        results.push(SseProcessResult::Usage(Usage {
841            input_tokens: u.prompt_tokens,
842            output_tokens: u.completion_tokens,
843            cached_input_tokens: u
844                .prompt_tokens_details
845                .as_ref()
846                .map_or(0, |details| details.cached_tokens),
847            cache_creation_input_tokens: 0,
848        }));
849    }
850
851    // Process choices
852    if let Some(choice) = chunk.choices.into_iter().next() {
853        // Handle text content delta. When `content` is empty/absent but the
854        // model streamed reasoning tokens (DeepSeek-style answer-in-
855        // `reasoning_content`, or `OpenRouter`-normalized `reasoning`), surface
856        // the reasoning as a thinking delta so the usable output is not silently
857        // dropped under streaming. This mirrors the non-streaming
858        // `build_content_blocks` fallback: text content takes precedence and the
859        // reasoning fallback only fires when `content` is empty.
860        if let Some(content) = choice.delta.content
861            && !content.is_empty()
862        {
863            results.push(SseProcessResult::TextDelta(content));
864        } else if let Some(reasoning) = choice
865            .delta
866            .reasoning_content
867            .as_deref()
868            .or(choice.delta.reasoning.as_deref())
869            .filter(|r| !r.is_empty())
870        {
871            results.push(SseProcessResult::ThinkingDelta(reasoning.to_owned()));
872        }
873
874        // Handle tool call deltas
875        if let Some(tc_deltas) = choice.delta.tool_calls {
876            for tc in tc_deltas {
877                results.push(SseProcessResult::ToolCallUpdate {
878                    index: tc.index,
879                    id: tc.id,
880                    name: tc.function.as_ref().and_then(|f| f.name.clone()),
881                    arguments: tc.function.as_ref().and_then(|f| f.arguments.clone()),
882                });
883            }
884        }
885
886        // Check for finish reason
887        if let Some(finish_reason) = choice.finish_reason {
888            results.push(SseProcessResult::Done(map_finish_reason(&finish_reason)));
889        }
890    }
891
892    results
893}
894
895fn use_max_tokens_alias(base_url: &str) -> bool {
896    base_url.contains("moonshot.ai")
897        || base_url.contains("api.z.ai")
898        || base_url.contains("minimax.io")
899}
900
901/// Every `OpenAI`-compatible endpoint accepts `stream_options.include_usage`;
902/// requesting it everywhere ensures `OpenRouter` / `Baseten` / local streams
903/// carry a usage frame so `total_usage` and downstream cost ledgers are
904/// populated (issue #302), not just first-party `api.openai.com` turns.
905const fn use_stream_usage_options(_base_url: &str) -> bool {
906    true
907}
908
909/// `OpenRouter` requires a separate top-level `usage: { include: true }` flag
910/// (distinct from `stream_options.include_usage`) to emit a usage frame.
911fn use_openrouter_usage_options(base_url: &str) -> bool {
912    base_url.contains("openrouter.ai")
913}
914
915/// Infer the stream stop reason when the provider never sent an explicit
916/// `finish_reason` (truncated stream / EOF): a turn with accumulated tool
917/// calls is a `ToolUse`, otherwise a plain `EndTurn`.
918fn fallback_stream_stop_reason(
919    tool_calls: &std::collections::HashMap<usize, ToolCallAccumulator>,
920) -> StopReason {
921    if tool_calls.is_empty() {
922        StopReason::EndTurn
923    } else {
924        StopReason::ToolUse
925    }
926}
927
928/// Map an HTTP status + body into a [`ChatOutcome`], parsing the success body
929/// into a [`ChatResponse`].
930fn decode_chat_response(
931    status: StatusCode,
932    bytes: &[u8],
933    retry_after: Option<std::time::Duration>,
934) -> Result<ChatOutcome> {
935    if status == StatusCode::TOO_MANY_REQUESTS {
936        return Ok(ChatOutcome::RateLimited(retry_after));
937    }
938
939    if status.is_server_error() {
940        let body = String::from_utf8_lossy(bytes);
941        log::error!("OpenAI server error status={status} body={body}");
942        return Ok(ChatOutcome::ServerError(body.into_owned()));
943    }
944
945    if status.is_client_error() {
946        let body = String::from_utf8_lossy(bytes);
947        log::warn!("OpenAI client error status={status} body={body}");
948        return Ok(ChatOutcome::InvalidRequest(body.into_owned()));
949    }
950
951    let api_response: ApiChatResponse = serde_json::from_slice(bytes)
952        .map_err(|e| anyhow::anyhow!("failed to parse response: {e}"))?;
953
954    let choice = api_response
955        .choices
956        .into_iter()
957        .next()
958        .ok_or_else(|| anyhow::anyhow!("no choices in response"))?;
959
960    let content = build_content_blocks(&choice.message);
961    let stop_reason = choice.finish_reason.as_deref().map(map_finish_reason);
962
963    Ok(ChatOutcome::Success(ChatResponse {
964        id: api_response.id,
965        content,
966        model: api_response.model,
967        stop_reason,
968        usage: Usage {
969            input_tokens: api_response.usage.prompt_tokens,
970            output_tokens: api_response.usage.completion_tokens,
971            cached_input_tokens: api_response
972                .usage
973                .prompt_tokens_details
974                .as_ref()
975                .map_or(0, |details| details.cached_tokens),
976            cache_creation_input_tokens: 0,
977        },
978    }))
979}
980
981fn map_finish_reason(finish_reason: &str) -> StopReason {
982    match finish_reason {
983        "stop" => StopReason::EndTurn,
984        "tool_calls" => StopReason::ToolUse,
985        "length" => StopReason::MaxTokens,
986        "content_filter" | "network_error" => StopReason::StopSequence,
987        "sensitive" => StopReason::Refusal,
988        unknown => {
989            log::debug!("Unknown finish_reason from OpenAI-compatible API: {unknown}");
990            StopReason::StopSequence
991        }
992    }
993}
994
995fn build_api_reasoning(thinking: Option<&ThinkingConfig>) -> Option<ApiReasoning> {
996    thinking
997        .and_then(resolve_reasoning_effort)
998        .map(|effort| ApiReasoning { effort })
999}
1000
1001const fn resolve_reasoning_effort(config: &ThinkingConfig) -> Option<ReasoningEffort> {
1002    if let Some(effort) = config.effort {
1003        return Some(map_effort(effort));
1004    }
1005
1006    match &config.mode {
1007        ThinkingMode::Adaptive => None,
1008        ThinkingMode::Enabled { budget_tokens } => Some(map_budget_to_reasoning(*budget_tokens)),
1009    }
1010}
1011
1012const fn map_effort(effort: Effort) -> ReasoningEffort {
1013    match effort {
1014        Effort::Low => ReasoningEffort::Low,
1015        Effort::Medium => ReasoningEffort::Medium,
1016        Effort::High => ReasoningEffort::High,
1017        Effort::Max => ReasoningEffort::XHigh,
1018    }
1019}
1020
1021const fn map_budget_to_reasoning(budget_tokens: u32) -> ReasoningEffort {
1022    if budget_tokens <= 4_096 {
1023        ReasoningEffort::Low
1024    } else if budget_tokens <= 16_384 {
1025        ReasoningEffort::Medium
1026    } else if budget_tokens <= 32_768 {
1027        ReasoningEffort::High
1028    } else {
1029        ReasoningEffort::XHigh
1030    }
1031}
1032
1033const fn api_role(role: agent_sdk_foundation::llm::Role) -> ApiRole {
1034    match role {
1035        agent_sdk_foundation::llm::Role::User => ApiRole::User,
1036        agent_sdk_foundation::llm::Role::Assistant => ApiRole::Assistant,
1037    }
1038}
1039
1040/// Convert a `Content::Blocks` message into the `OpenAI` wire messages it maps
1041/// to, appending them to `messages`.
1042///
1043/// Tool results become standalone `tool` messages; text, tool calls and (on
1044/// assistant tool-call turns) echoed-back reasoning collapse into a single
1045/// message.
1046fn append_block_messages(
1047    messages: &mut Vec<ApiMessage>,
1048    role: agent_sdk_foundation::llm::Role,
1049    blocks: &[ContentBlock],
1050) {
1051    let mut text_parts = Vec::new();
1052    let mut thinking_parts = Vec::new();
1053    let mut tool_calls = Vec::new();
1054
1055    for block in blocks {
1056        match block {
1057            ContentBlock::Text { text } => text_parts.push(text.clone()),
1058            ContentBlock::Thinking { thinking, .. } => {
1059                // DeepSeek-style thinking-mode multi-turn requires the prior
1060                // assistant reasoning_content to be echoed back on a tool-call
1061                // turn or the API 400s. Collected here; only carried into
1062                // reasoning_content below when this turn also has a tool call.
1063                thinking_parts.push(thinking.clone());
1064            }
1065            ContentBlock::RedactedThinking { .. }
1066            | ContentBlock::Image { .. }
1067            | ContentBlock::Document { .. } => {
1068                // These blocks are not sent to the OpenAI API
1069            }
1070            ContentBlock::ToolUse {
1071                id, name, input, ..
1072            } => {
1073                tool_calls.push(ApiToolCall {
1074                    id: id.clone(),
1075                    r#type: "function".to_owned(),
1076                    function: ApiFunctionCall {
1077                        name: name.clone(),
1078                        arguments: serde_json::to_string(input).unwrap_or_else(|_| "{}".to_owned()),
1079                    },
1080                });
1081            }
1082            ContentBlock::ToolResult {
1083                tool_use_id,
1084                content,
1085                ..
1086            } => {
1087                // Tool results are separate messages in OpenAI
1088                messages.push(ApiMessage {
1089                    role: ApiRole::Tool,
1090                    content: Some(content.clone()),
1091                    reasoning_content: None,
1092                    tool_calls: None,
1093                    tool_call_id: Some(tool_use_id.clone()),
1094                });
1095            }
1096            // `ContentBlock` is `#[non_exhaustive]`; a block kind this SDK
1097            // version cannot represent is not sent to OpenAI.
1098            _ => log::warn!("Skipping unrecognized OpenAI content block"),
1099        }
1100    }
1101
1102    let role = api_role(role);
1103
1104    // reasoning_content is only echoed back on an assistant turn that ALSO
1105    // carries a tool call — the one case DeepSeek's thinking-mode protocol
1106    // requires it. Per that protocol legacy `deepseek-reasoner` 400s if
1107    // reasoning_content appears in input at all, and DeepSeek V4 thinking-mode
1108    // only needs it on tool-call turns. So a plain reasoning-only assistant
1109    // turn (no tool call) does NOT carry reasoning_content, and it is never
1110    // attached to user messages.
1111    let reasoning_content =
1112        if role == ApiRole::Assistant && !thinking_parts.is_empty() && !tool_calls.is_empty() {
1113            Some(thinking_parts.join("\n"))
1114        } else {
1115            None
1116        };
1117
1118    // Add the message when it carries text, tool calls, or (for an assistant
1119    // turn) reasoning to echo back. Only emit if it's an assistant message or
1120    // has text content.
1121    let has_payload =
1122        !text_parts.is_empty() || !tool_calls.is_empty() || reasoning_content.is_some();
1123    if has_payload && (role == ApiRole::Assistant || !text_parts.is_empty()) {
1124        messages.push(ApiMessage {
1125            role,
1126            content: if text_parts.is_empty() {
1127                None
1128            } else {
1129                Some(text_parts.join("\n"))
1130            },
1131            reasoning_content,
1132            tool_calls: if tool_calls.is_empty() {
1133                None
1134            } else {
1135                Some(tool_calls)
1136            },
1137            tool_call_id: None,
1138        });
1139    }
1140}
1141
1142fn build_api_messages(request: &ChatRequest) -> Vec<ApiMessage> {
1143    let mut messages = Vec::new();
1144
1145    // Add system message first (OpenAI uses a separate message for system prompt)
1146    if !request.system.is_empty() {
1147        messages.push(ApiMessage {
1148            role: ApiRole::System,
1149            content: Some(request.system.clone()),
1150            reasoning_content: None,
1151            tool_calls: None,
1152            tool_call_id: None,
1153        });
1154    }
1155
1156    // Convert SDK messages to OpenAI format
1157    for msg in &request.messages {
1158        match &msg.content {
1159            Content::Text(text) => {
1160                messages.push(ApiMessage {
1161                    role: api_role(msg.role),
1162                    content: Some(text.clone()),
1163                    reasoning_content: None,
1164                    tool_calls: None,
1165                    tool_call_id: None,
1166                });
1167            }
1168            Content::Blocks(blocks) => append_block_messages(&mut messages, msg.role, blocks),
1169        }
1170    }
1171
1172    messages
1173}
1174
1175fn convert_tool(t: agent_sdk_foundation::llm::Tool) -> ApiTool {
1176    ApiTool {
1177        r#type: "function".to_owned(),
1178        function: ApiFunction {
1179            name: t.name,
1180            description: t.description,
1181            parameters: t.input_schema,
1182        },
1183    }
1184}
1185
1186/// Non-empty reasoning text from an `OpenAI`-compatible response message, if any.
1187///
1188/// Prefers `DeepSeek`-style `reasoning_content`, falling back to the `reasoning`
1189/// field used by some `OpenRouter` upstreams.
1190fn reasoning_text(message: &ApiResponseMessage) -> Option<&str> {
1191    message
1192        .reasoning_content
1193        .as_deref()
1194        .or(message.reasoning.as_deref())
1195        .filter(|r| !r.is_empty())
1196}
1197
1198fn build_content_blocks(message: &ApiResponseMessage) -> Vec<ContentBlock> {
1199    let mut blocks = Vec::new();
1200
1201    // Add text content if present
1202    if let Some(content) = &message.content
1203        && !content.is_empty()
1204    {
1205        blocks.push(ContentBlock::Text {
1206            text: content.clone(),
1207        });
1208    } else if let Some(reasoning) = reasoning_text(message) {
1209        // Reasoning-model fallback: when `content` is empty/absent but the model
1210        // produced reasoning tokens (DeepSeek-style answer-in-`reasoning_content`,
1211        // or any reasoning model truncated under a tight `max_tokens` before it
1212        // emitted visible content), surface the reasoning as a Thinking block so
1213        // the usable output is not silently dropped. This is a fallback only —
1214        // when `content` is present the reasoning is left untouched.
1215        blocks.push(ContentBlock::Thinking {
1216            thinking: reasoning.to_owned(),
1217            signature: None,
1218        });
1219    }
1220
1221    // Add tool calls if present
1222    if let Some(tool_calls) = &message.tool_calls {
1223        for tc in tool_calls {
1224            let input: serde_json::Value = serde_json::from_str(&tc.function.arguments)
1225                .unwrap_or_else(|_| serde_json::json!({}));
1226            blocks.push(ContentBlock::ToolUse {
1227                id: tc.id.clone(),
1228                name: tc.function.name.clone(),
1229                input,
1230                thought_signature: None,
1231            });
1232        }
1233    }
1234
1235    blocks
1236}
1237
1238// ============================================================================
1239// API Request Types
1240// ============================================================================
1241
1242#[derive(Serialize)]
1243struct ApiChatRequest<'a> {
1244    model: &'a str,
1245    messages: &'a [ApiMessage],
1246    #[serde(skip_serializing_if = "Option::is_none")]
1247    max_completion_tokens: Option<u32>,
1248    #[serde(skip_serializing_if = "Option::is_none")]
1249    max_tokens: Option<u32>,
1250    #[serde(skip_serializing_if = "Option::is_none")]
1251    tools: Option<&'a [ApiTool]>,
1252    #[serde(skip_serializing_if = "Option::is_none")]
1253    tool_choice: Option<ApiToolChoice>,
1254    #[serde(skip_serializing_if = "Option::is_none")]
1255    reasoning: Option<ApiReasoning>,
1256    #[serde(skip_serializing_if = "Option::is_none")]
1257    response_format: Option<ApiResponseFormat>,
1258}
1259
1260#[derive(Serialize)]
1261struct ApiChatRequestStreaming<'a> {
1262    model: &'a str,
1263    messages: &'a [ApiMessage],
1264    #[serde(skip_serializing_if = "Option::is_none")]
1265    max_completion_tokens: Option<u32>,
1266    #[serde(skip_serializing_if = "Option::is_none")]
1267    max_tokens: Option<u32>,
1268    #[serde(skip_serializing_if = "Option::is_none")]
1269    tools: Option<&'a [ApiTool]>,
1270    #[serde(skip_serializing_if = "Option::is_none")]
1271    tool_choice: Option<ApiToolChoice>,
1272    #[serde(skip_serializing_if = "Option::is_none")]
1273    reasoning: Option<ApiReasoning>,
1274    #[serde(skip_serializing_if = "Option::is_none")]
1275    response_format: Option<ApiResponseFormat>,
1276    #[serde(skip_serializing_if = "Option::is_none")]
1277    stream_options: Option<ApiStreamOptions>,
1278    #[serde(skip_serializing_if = "Option::is_none")]
1279    usage: Option<ApiOpenRouterUsageOptions>,
1280    stream: bool,
1281}
1282
1283/// `OpenAI` `tool_choice` wire format.
1284///
1285/// - `"auto"` — model decides.
1286/// - `{"type": "function", "function": {"name": "<name>"}}` — force a specific function.
1287#[derive(Serialize)]
1288#[serde(untagged)]
1289enum ApiToolChoice {
1290    String(String),
1291    Named {
1292        #[serde(rename = "type")]
1293        choice_type: String,
1294        function: ApiToolChoiceFunction,
1295    },
1296}
1297
1298#[derive(Serialize)]
1299struct ApiToolChoiceFunction {
1300    name: String,
1301}
1302
1303impl ApiToolChoice {
1304    fn from_tool_choice(tc: &agent_sdk_foundation::llm::ToolChoice) -> Self {
1305        match tc {
1306            agent_sdk_foundation::llm::ToolChoice::Auto => Self::String("auto".to_owned()),
1307            agent_sdk_foundation::llm::ToolChoice::Tool(name) => Self::Named {
1308                choice_type: "function".to_owned(),
1309                function: ApiToolChoiceFunction { name: name.clone() },
1310            },
1311        }
1312    }
1313}
1314
1315/// `OpenAI` `response_format` wire format for structured outputs.
1316///
1317/// Emits `{"type": "json_schema", "json_schema": {"name", "schema", "strict"}}`.
1318#[derive(Serialize)]
1319struct ApiResponseFormat {
1320    #[serde(rename = "type")]
1321    format_type: &'static str,
1322    json_schema: ApiJsonSchema,
1323}
1324
1325#[derive(Serialize)]
1326struct ApiJsonSchema {
1327    name: String,
1328    schema: serde_json::Value,
1329    strict: bool,
1330}
1331
1332impl ApiResponseFormat {
1333    fn from_response_format(rf: &agent_sdk_foundation::llm::ResponseFormat) -> Self {
1334        Self {
1335            format_type: "json_schema",
1336            json_schema: ApiJsonSchema {
1337                name: rf.name.clone(),
1338                schema: rf.schema.clone(),
1339                strict: rf.strict,
1340            },
1341        }
1342    }
1343}
1344
1345#[derive(Clone, Copy, Serialize)]
1346struct ApiStreamOptions {
1347    include_usage: bool,
1348}
1349
1350/// `OpenRouter`'s top-level usage-accounting flag (`usage: { include: true }`),
1351/// distinct from `stream_options.include_usage`.
1352#[derive(Clone, Copy, Serialize)]
1353struct ApiOpenRouterUsageOptions {
1354    include: bool,
1355}
1356
1357#[derive(Clone, Copy, Serialize)]
1358#[serde(rename_all = "lowercase")]
1359enum ReasoningEffort {
1360    Low,
1361    Medium,
1362    High,
1363    #[serde(rename = "xhigh")]
1364    XHigh,
1365}
1366
1367#[derive(Serialize)]
1368struct ApiReasoning {
1369    effort: ReasoningEffort,
1370}
1371
1372#[derive(Serialize)]
1373struct ApiMessage {
1374    role: ApiRole,
1375    #[serde(skip_serializing_if = "Option::is_none")]
1376    content: Option<String>,
1377    /// `DeepSeek`-style thinking-mode multi-turn requires the prior assistant
1378    /// `reasoning_content` to be echoed back on a tool-call turn or the API
1379    /// rejects it (HTTP 400). Carried back only for assistant turns that had a
1380    /// Thinking block AND a tool call; omitted entirely otherwise (including
1381    /// reasoning-only turns, since legacy `deepseek-reasoner` 400s if
1382    /// `reasoning_content` appears in input) so the normal path is unchanged.
1383    #[serde(skip_serializing_if = "Option::is_none")]
1384    reasoning_content: Option<String>,
1385    #[serde(skip_serializing_if = "Option::is_none")]
1386    tool_calls: Option<Vec<ApiToolCall>>,
1387    #[serde(skip_serializing_if = "Option::is_none")]
1388    tool_call_id: Option<String>,
1389}
1390
1391#[derive(Debug, Serialize, PartialEq, Eq)]
1392#[serde(rename_all = "lowercase")]
1393enum ApiRole {
1394    System,
1395    User,
1396    Assistant,
1397    Tool,
1398}
1399
1400#[derive(Serialize)]
1401struct ApiToolCall {
1402    id: String,
1403    r#type: String,
1404    function: ApiFunctionCall,
1405}
1406
1407#[derive(Serialize)]
1408struct ApiFunctionCall {
1409    name: String,
1410    arguments: String,
1411}
1412
1413#[derive(Serialize)]
1414struct ApiTool {
1415    r#type: String,
1416    function: ApiFunction,
1417}
1418
1419#[derive(Serialize)]
1420struct ApiFunction {
1421    name: String,
1422    description: String,
1423    parameters: serde_json::Value,
1424}
1425
1426// ============================================================================
1427// API Response Types
1428// ============================================================================
1429
1430#[derive(Deserialize)]
1431struct ApiChatResponse {
1432    id: String,
1433    choices: Vec<ApiChoice>,
1434    model: String,
1435    usage: ApiUsage,
1436}
1437
1438#[derive(Deserialize)]
1439struct ApiChoice {
1440    message: ApiResponseMessage,
1441    finish_reason: Option<String>,
1442}
1443
1444#[derive(Deserialize)]
1445struct ApiResponseMessage {
1446    content: Option<String>,
1447    tool_calls: Option<Vec<ApiResponseToolCall>>,
1448    /// `DeepSeek`-style chain-of-thought, returned at the same level as
1449    /// `content` (`DeepSeek` V4 / some `OpenRouter` providers).
1450    #[serde(default)]
1451    reasoning_content: Option<String>,
1452    /// `OpenRouter` normalizes reasoning under a `reasoning` field for some
1453    /// upstreams; treated as an equivalent fallback to `reasoning_content`.
1454    #[serde(default)]
1455    reasoning: Option<String>,
1456}
1457
1458#[derive(Deserialize)]
1459struct ApiResponseToolCall {
1460    id: String,
1461    function: ApiResponseFunctionCall,
1462}
1463
1464#[derive(Deserialize)]
1465struct ApiResponseFunctionCall {
1466    name: String,
1467    arguments: String,
1468}
1469
1470#[derive(Deserialize)]
1471struct ApiUsage {
1472    #[serde(deserialize_with = "deserialize_u32_from_number")]
1473    prompt_tokens: u32,
1474    #[serde(deserialize_with = "deserialize_u32_from_number")]
1475    completion_tokens: u32,
1476    #[serde(default)]
1477    prompt_tokens_details: Option<ApiPromptTokensDetails>,
1478}
1479
1480#[derive(Deserialize)]
1481struct ApiPromptTokensDetails {
1482    #[serde(default, deserialize_with = "deserialize_u32_from_number")]
1483    cached_tokens: u32,
1484}
1485
1486// ============================================================================
1487// SSE Streaming Types
1488// ============================================================================
1489
1490/// Accumulator for tool call state across stream deltas.
1491struct ToolCallAccumulator {
1492    id: String,
1493    name: String,
1494    arguments: String,
1495}
1496
1497/// A single chunk in `OpenAI`'s SSE stream.
1498#[derive(Deserialize)]
1499struct SseChunk {
1500    // A usage-only frame (OpenAI's trailing chunk, OpenRouter, etc.) may omit
1501    // `choices` entirely; without `default` it fails to deserialize and the
1502    // usage frame is dropped silently.
1503    #[serde(default)]
1504    choices: Vec<SseChoice>,
1505    #[serde(default)]
1506    usage: Option<SseUsage>,
1507}
1508
1509#[derive(Deserialize)]
1510struct SseChoice {
1511    delta: SseDelta,
1512    finish_reason: Option<String>,
1513}
1514
1515#[derive(Deserialize)]
1516struct SseDelta {
1517    content: Option<String>,
1518    tool_calls: Option<Vec<SseToolCallDelta>>,
1519    /// `DeepSeek`-style streamed chain-of-thought, returned at the same level as
1520    /// `content` (`DeepSeek` V4 / some `OpenRouter` providers).
1521    #[serde(default)]
1522    reasoning_content: Option<String>,
1523    /// `OpenRouter` normalizes streamed reasoning under a `reasoning` field for
1524    /// some upstreams; treated as an equivalent fallback to `reasoning_content`.
1525    #[serde(default)]
1526    reasoning: Option<String>,
1527}
1528
1529#[derive(Deserialize)]
1530struct SseToolCallDelta {
1531    index: usize,
1532    id: Option<String>,
1533    function: Option<SseFunctionDelta>,
1534}
1535
1536#[derive(Deserialize)]
1537struct SseFunctionDelta {
1538    name: Option<String>,
1539    arguments: Option<String>,
1540}
1541
1542#[derive(Deserialize)]
1543struct SseUsage {
1544    #[serde(deserialize_with = "deserialize_u32_from_number")]
1545    prompt_tokens: u32,
1546    #[serde(deserialize_with = "deserialize_u32_from_number")]
1547    completion_tokens: u32,
1548    #[serde(default)]
1549    prompt_tokens_details: Option<ApiPromptTokensDetails>,
1550}
1551
1552fn deserialize_u32_from_number<'de, D>(deserializer: D) -> std::result::Result<u32, D::Error>
1553where
1554    D: serde::Deserializer<'de>,
1555{
1556    #[derive(Deserialize)]
1557    #[serde(untagged)]
1558    enum NumberLike {
1559        U64(u64),
1560        F64(f64),
1561    }
1562
1563    match NumberLike::deserialize(deserializer)? {
1564        NumberLike::U64(v) => u32::try_from(v)
1565            .map_err(|_| D::Error::custom(format!("token count out of range for u32: {v}"))),
1566        NumberLike::F64(v) => {
1567            if v.is_finite() && v >= 0.0 && v.fract() == 0.0 && v <= f64::from(u32::MAX) {
1568                v.to_string().parse::<u32>().map_err(|e| {
1569                    D::Error::custom(format!(
1570                        "failed to convert integer-compatible token count {v} to u32: {e}"
1571                    ))
1572                })
1573            } else {
1574                Err(D::Error::custom(format!(
1575                    "token count must be a non-negative integer-compatible number, got {v}"
1576                )))
1577            }
1578        }
1579    }
1580}
1581
1582#[cfg(test)]
1583mod tests {
1584    use super::*;
1585    use anyhow::Context as _;
1586
1587    const OPENAI_MODELS_FIXTURE: &str = r#"{
1588      "object": "list",
1589      "data": [
1590        {"id": "gpt-5.4", "object": "model", "owned_by": "openai"},
1591        {"id": "gpt-4o", "object": "model", "owned_by": "openai"}
1592      ]
1593    }"#;
1594
1595    #[test]
1596    fn parse_models_list_reads_ids() -> anyhow::Result<()> {
1597        let models = parse_models_list(OPENAI_MODELS_FIXTURE)?;
1598        assert_eq!(models.len(), 2);
1599        assert_eq!(models[0].id, "gpt-5.4");
1600        assert_eq!(models[1].id, "gpt-4o");
1601        // The Chat Completions list endpoint reports no name or limits.
1602        assert_eq!(models[0].display_name, None);
1603        assert_eq!(models[0].context_window, None);
1604        Ok(())
1605    }
1606
1607    // ===================
1608    // Constructor Tests
1609    // ===================
1610
1611    #[test]
1612    fn test_new_creates_provider_with_custom_model() {
1613        let provider = OpenAIProvider::new("test-api-key".to_string(), "custom-model".to_string());
1614
1615        assert_eq!(provider.model(), "custom-model");
1616        assert_eq!(provider.provider(), "openai");
1617        assert_eq!(provider.base_url, DEFAULT_BASE_URL);
1618    }
1619
1620    #[test]
1621    fn test_with_base_url_creates_provider_with_custom_url() {
1622        let provider = OpenAIProvider::with_base_url(
1623            "test-api-key".to_string(),
1624            "llama3".to_string(),
1625            "http://localhost:11434/v1".to_string(),
1626        );
1627
1628        assert_eq!(provider.model(), "llama3");
1629        assert_eq!(provider.base_url, "http://localhost:11434/v1");
1630    }
1631
1632    #[test]
1633    fn test_gpt4o_factory_creates_gpt4o_provider() {
1634        let provider = OpenAIProvider::gpt4o("test-api-key".to_string());
1635
1636        assert_eq!(provider.model(), MODEL_GPT4O);
1637        assert_eq!(provider.provider(), "openai");
1638    }
1639
1640    #[test]
1641    fn test_gpt4o_mini_factory_creates_gpt4o_mini_provider() {
1642        let provider = OpenAIProvider::gpt4o_mini("test-api-key".to_string());
1643
1644        assert_eq!(provider.model(), MODEL_GPT4O_MINI);
1645        assert_eq!(provider.provider(), "openai");
1646    }
1647
1648    #[test]
1649    fn test_gpt52_thinking_factory_creates_provider() {
1650        let provider = OpenAIProvider::gpt52_thinking("test-api-key".to_string());
1651
1652        assert_eq!(provider.model(), MODEL_GPT52_THINKING);
1653        assert_eq!(provider.provider(), "openai");
1654    }
1655
1656    #[test]
1657    fn test_gpt54_factory_creates_provider() {
1658        let provider = OpenAIProvider::gpt54("test-api-key".to_string());
1659
1660        assert_eq!(provider.model(), MODEL_GPT54);
1661        assert_eq!(provider.provider(), "openai");
1662    }
1663
1664    #[test]
1665    fn test_gpt53_codex_factory_creates_provider() {
1666        let provider = OpenAIProvider::gpt53_codex("test-api-key".to_string());
1667
1668        assert_eq!(provider.model(), MODEL_GPT53_CODEX);
1669        assert_eq!(provider.provider(), "openai");
1670    }
1671
1672    #[test]
1673    fn test_codex_factory_points_to_latest_codex_model() {
1674        let provider = OpenAIProvider::codex("test-api-key".to_string());
1675
1676        assert_eq!(provider.model(), MODEL_GPT53_CODEX);
1677        assert_eq!(provider.provider(), "openai");
1678    }
1679
1680    #[test]
1681    fn test_gpt5_factory_creates_gpt5_provider() {
1682        let provider = OpenAIProvider::gpt5("test-api-key".to_string());
1683
1684        assert_eq!(provider.model(), MODEL_GPT5);
1685        assert_eq!(provider.provider(), "openai");
1686    }
1687
1688    #[test]
1689    fn test_gpt5_mini_factory_creates_provider() {
1690        let provider = OpenAIProvider::gpt5_mini("test-api-key".to_string());
1691
1692        assert_eq!(provider.model(), MODEL_GPT5_MINI);
1693        assert_eq!(provider.provider(), "openai");
1694    }
1695
1696    #[test]
1697    fn test_o3_factory_creates_o3_provider() {
1698        let provider = OpenAIProvider::o3("test-api-key".to_string());
1699
1700        assert_eq!(provider.model(), MODEL_O3);
1701        assert_eq!(provider.provider(), "openai");
1702    }
1703
1704    #[test]
1705    fn test_o4_mini_factory_creates_o4_mini_provider() {
1706        let provider = OpenAIProvider::o4_mini("test-api-key".to_string());
1707
1708        assert_eq!(provider.model(), MODEL_O4_MINI);
1709        assert_eq!(provider.provider(), "openai");
1710    }
1711
1712    #[test]
1713    fn test_o1_factory_creates_o1_provider() {
1714        let provider = OpenAIProvider::o1("test-api-key".to_string());
1715
1716        assert_eq!(provider.model(), MODEL_O1);
1717        assert_eq!(provider.provider(), "openai");
1718    }
1719
1720    #[test]
1721    fn test_gpt41_factory_creates_gpt41_provider() {
1722        let provider = OpenAIProvider::gpt41("test-api-key".to_string());
1723
1724        assert_eq!(provider.model(), MODEL_GPT41);
1725        assert_eq!(provider.provider(), "openai");
1726    }
1727
1728    #[test]
1729    fn test_kimi_factory_creates_provider_with_kimi_base_url() {
1730        let provider = OpenAIProvider::kimi("test-api-key".to_string(), "kimi-custom".to_string());
1731
1732        assert_eq!(provider.model(), "kimi-custom");
1733        assert_eq!(provider.base_url, BASE_URL_KIMI);
1734        assert_eq!(provider.provider(), "openai");
1735    }
1736
1737    #[test]
1738    fn test_kimi_k2_5_factory_creates_provider() {
1739        let provider = OpenAIProvider::kimi_k2_5("test-api-key".to_string());
1740
1741        assert_eq!(provider.model(), MODEL_KIMI_K2_5);
1742        assert_eq!(provider.base_url, BASE_URL_KIMI);
1743        assert_eq!(provider.provider(), "openai");
1744    }
1745
1746    #[test]
1747    fn test_kimi_k2_thinking_factory_creates_provider() {
1748        let provider = OpenAIProvider::kimi_k2_thinking("test-api-key".to_string());
1749
1750        assert_eq!(provider.model(), MODEL_KIMI_K2_THINKING);
1751        assert_eq!(provider.base_url, BASE_URL_KIMI);
1752        assert_eq!(provider.provider(), "openai");
1753    }
1754
1755    #[test]
1756    fn test_zai_factory_creates_provider_with_zai_base_url() {
1757        let provider = OpenAIProvider::zai("test-api-key".to_string(), "glm-custom".to_string());
1758
1759        assert_eq!(provider.model(), "glm-custom");
1760        assert_eq!(provider.base_url, BASE_URL_ZAI);
1761        assert_eq!(provider.provider(), "openai");
1762    }
1763
1764    #[test]
1765    fn test_zai_glm5_factory_creates_provider() {
1766        let provider = OpenAIProvider::zai_glm5("test-api-key".to_string());
1767
1768        assert_eq!(provider.model(), MODEL_ZAI_GLM5);
1769        assert_eq!(provider.base_url, BASE_URL_ZAI);
1770        assert_eq!(provider.provider(), "openai");
1771    }
1772
1773    #[test]
1774    fn test_minimax_factory_creates_provider_with_minimax_base_url() {
1775        let provider =
1776            OpenAIProvider::minimax("test-api-key".to_string(), "minimax-custom".to_string());
1777
1778        assert_eq!(provider.model(), "minimax-custom");
1779        assert_eq!(provider.base_url, BASE_URL_MINIMAX);
1780        assert_eq!(provider.provider(), "openai");
1781    }
1782
1783    #[test]
1784    fn test_minimax_m2_5_factory_creates_provider() {
1785        let provider = OpenAIProvider::minimax_m2_5("test-api-key".to_string());
1786
1787        assert_eq!(provider.model(), MODEL_MINIMAX_M2_5);
1788        assert_eq!(provider.base_url, BASE_URL_MINIMAX);
1789        assert_eq!(provider.provider(), "openai");
1790    }
1791
1792    // ===================
1793    // Model Constants Tests
1794    // ===================
1795
1796    #[test]
1797    fn test_model_constants_have_expected_values() {
1798        // GPT-5.4 / GPT-5.3 Codex
1799        assert_eq!(MODEL_GPT54, "gpt-5.4");
1800        assert_eq!(MODEL_GPT53_CODEX, "gpt-5.3-codex");
1801        // GPT-5.2 series
1802        assert_eq!(MODEL_GPT52_INSTANT, "gpt-5.2-instant");
1803        assert_eq!(MODEL_GPT52_THINKING, "gpt-5.2-thinking");
1804        assert_eq!(MODEL_GPT52_PRO, "gpt-5.2-pro");
1805        assert_eq!(MODEL_GPT52_CODEX, "gpt-5.2-codex");
1806        // GPT-5 series
1807        assert_eq!(MODEL_GPT5, "gpt-5");
1808        assert_eq!(MODEL_GPT5_MINI, "gpt-5-mini");
1809        assert_eq!(MODEL_GPT5_NANO, "gpt-5-nano");
1810        // o-series
1811        assert_eq!(MODEL_O3, "o3");
1812        assert_eq!(MODEL_O3_MINI, "o3-mini");
1813        assert_eq!(MODEL_O4_MINI, "o4-mini");
1814        assert_eq!(MODEL_O1, "o1");
1815        assert_eq!(MODEL_O1_MINI, "o1-mini");
1816        // GPT-4.1 series
1817        assert_eq!(MODEL_GPT41, "gpt-4.1");
1818        assert_eq!(MODEL_GPT41_MINI, "gpt-4.1-mini");
1819        assert_eq!(MODEL_GPT41_NANO, "gpt-4.1-nano");
1820        // GPT-4o series
1821        assert_eq!(MODEL_GPT4O, "gpt-4o");
1822        assert_eq!(MODEL_GPT4O_MINI, "gpt-4o-mini");
1823        // OpenAI-compatible vendor defaults
1824        assert_eq!(MODEL_KIMI_K2_5, "kimi-k2.5");
1825        assert_eq!(MODEL_KIMI_K2_THINKING, "kimi-k2-thinking");
1826        assert_eq!(MODEL_ZAI_GLM5, "glm-5");
1827        assert_eq!(MODEL_MINIMAX_M2_5, "MiniMax-M2.5");
1828        assert_eq!(BASE_URL_KIMI, "https://api.moonshot.ai/v1");
1829        assert_eq!(BASE_URL_ZAI, "https://api.z.ai/api/paas/v4");
1830        assert_eq!(BASE_URL_MINIMAX, "https://api.minimax.io/v1");
1831    }
1832
1833    // ===================
1834    // Clone Tests
1835    // ===================
1836
1837    #[test]
1838    fn test_provider_is_cloneable() {
1839        let provider = OpenAIProvider::new("test-api-key".to_string(), "test-model".to_string());
1840        let cloned = provider.clone();
1841
1842        assert_eq!(provider.model(), cloned.model());
1843        assert_eq!(provider.provider(), cloned.provider());
1844        assert_eq!(provider.base_url, cloned.base_url);
1845    }
1846
1847    // ===================
1848    // API Type Serialization Tests
1849    // ===================
1850
1851    #[test]
1852    fn test_api_role_serialization() {
1853        let system_role = ApiRole::System;
1854        let user_role = ApiRole::User;
1855        let assistant_role = ApiRole::Assistant;
1856        let tool_role = ApiRole::Tool;
1857
1858        assert_eq!(serde_json::to_string(&system_role).unwrap(), "\"system\"");
1859        assert_eq!(serde_json::to_string(&user_role).unwrap(), "\"user\"");
1860        assert_eq!(
1861            serde_json::to_string(&assistant_role).unwrap(),
1862            "\"assistant\""
1863        );
1864        assert_eq!(serde_json::to_string(&tool_role).unwrap(), "\"tool\"");
1865    }
1866
1867    #[test]
1868    fn test_api_message_serialization_simple() {
1869        let message = ApiMessage {
1870            role: ApiRole::User,
1871            content: Some("Hello, world!".to_string()),
1872            reasoning_content: None,
1873            tool_calls: None,
1874            tool_call_id: None,
1875        };
1876
1877        let json = serde_json::to_string(&message).unwrap();
1878        assert!(json.contains("\"role\":\"user\""));
1879        assert!(json.contains("\"content\":\"Hello, world!\""));
1880        // Optional fields should be omitted
1881        assert!(!json.contains("tool_calls"));
1882        assert!(!json.contains("tool_call_id"));
1883    }
1884
1885    #[test]
1886    fn test_api_message_serialization_with_tool_calls() {
1887        let message = ApiMessage {
1888            role: ApiRole::Assistant,
1889            content: Some("Let me help.".to_string()),
1890            reasoning_content: None,
1891            tool_calls: Some(vec![ApiToolCall {
1892                id: "call_123".to_string(),
1893                r#type: "function".to_string(),
1894                function: ApiFunctionCall {
1895                    name: "read_file".to_string(),
1896                    arguments: "{\"path\": \"/test.txt\"}".to_string(),
1897                },
1898            }]),
1899            tool_call_id: None,
1900        };
1901
1902        let json = serde_json::to_string(&message).unwrap();
1903        assert!(json.contains("\"role\":\"assistant\""));
1904        assert!(json.contains("\"tool_calls\""));
1905        assert!(json.contains("\"id\":\"call_123\""));
1906        assert!(json.contains("\"type\":\"function\""));
1907        assert!(json.contains("\"name\":\"read_file\""));
1908    }
1909
1910    #[test]
1911    fn test_api_tool_message_serialization() {
1912        let message = ApiMessage {
1913            role: ApiRole::Tool,
1914            content: Some("File contents here".to_string()),
1915            reasoning_content: None,
1916            tool_calls: None,
1917            tool_call_id: Some("call_123".to_string()),
1918        };
1919
1920        let json = serde_json::to_string(&message).unwrap();
1921        assert!(json.contains("\"role\":\"tool\""));
1922        assert!(json.contains("\"tool_call_id\":\"call_123\""));
1923        assert!(json.contains("\"content\":\"File contents here\""));
1924    }
1925
1926    #[test]
1927    fn test_api_tool_serialization() {
1928        let tool = ApiTool {
1929            r#type: "function".to_string(),
1930            function: ApiFunction {
1931                name: "test_tool".to_string(),
1932                description: "A test tool".to_string(),
1933                parameters: serde_json::json!({
1934                    "type": "object",
1935                    "properties": {
1936                        "arg": {"type": "string"}
1937                    }
1938                }),
1939            },
1940        };
1941
1942        let json = serde_json::to_string(&tool).unwrap();
1943        assert!(json.contains("\"type\":\"function\""));
1944        assert!(json.contains("\"name\":\"test_tool\""));
1945        assert!(json.contains("\"description\":\"A test tool\""));
1946        assert!(json.contains("\"parameters\""));
1947    }
1948
1949    // ===================
1950    // API Type Deserialization Tests
1951    // ===================
1952
1953    #[test]
1954    fn test_api_response_deserialization() {
1955        let json = r#"{
1956            "id": "chatcmpl-123",
1957            "choices": [
1958                {
1959                    "message": {
1960                        "content": "Hello!"
1961                    },
1962                    "finish_reason": "stop"
1963                }
1964            ],
1965            "model": "gpt-4o",
1966            "usage": {
1967                "prompt_tokens": 100,
1968                "completion_tokens": 50
1969            }
1970        }"#;
1971
1972        let response: ApiChatResponse = serde_json::from_str(json).unwrap();
1973        assert_eq!(response.id, "chatcmpl-123");
1974        assert_eq!(response.model, "gpt-4o");
1975        assert_eq!(response.usage.prompt_tokens, 100);
1976        assert_eq!(response.usage.completion_tokens, 50);
1977        assert_eq!(response.choices.len(), 1);
1978        assert_eq!(
1979            response.choices[0].message.content,
1980            Some("Hello!".to_string())
1981        );
1982    }
1983
1984    #[test]
1985    fn test_api_response_with_tool_calls_deserialization() {
1986        let json = r#"{
1987            "id": "chatcmpl-456",
1988            "choices": [
1989                {
1990                    "message": {
1991                        "content": null,
1992                        "tool_calls": [
1993                            {
1994                                "id": "call_abc",
1995                                "type": "function",
1996                                "function": {
1997                                    "name": "read_file",
1998                                    "arguments": "{\"path\": \"test.txt\"}"
1999                                }
2000                            }
2001                        ]
2002                    },
2003                    "finish_reason": "tool_calls"
2004                }
2005            ],
2006            "model": "gpt-4o",
2007            "usage": {
2008                "prompt_tokens": 150,
2009                "completion_tokens": 30
2010            }
2011        }"#;
2012
2013        let response: ApiChatResponse = serde_json::from_str(json).unwrap();
2014        let tool_calls = response.choices[0].message.tool_calls.as_ref().unwrap();
2015        assert_eq!(tool_calls.len(), 1);
2016        assert_eq!(tool_calls[0].id, "call_abc");
2017        assert_eq!(tool_calls[0].function.name, "read_file");
2018    }
2019
2020    #[test]
2021    fn test_api_response_with_unknown_finish_reason_deserialization() {
2022        let json = r#"{
2023            "id": "chatcmpl-789",
2024            "choices": [
2025                {
2026                    "message": {
2027                        "content": "ok"
2028                    },
2029                    "finish_reason": "vendor_custom_reason"
2030                }
2031            ],
2032            "model": "glm-5",
2033            "usage": {
2034                "prompt_tokens": 10,
2035                "completion_tokens": 5
2036            }
2037        }"#;
2038
2039        let response: ApiChatResponse = serde_json::from_str(json).unwrap();
2040        assert_eq!(
2041            response.choices[0].finish_reason.as_deref(),
2042            Some("vendor_custom_reason")
2043        );
2044        assert_eq!(
2045            map_finish_reason(response.choices[0].finish_reason.as_deref().unwrap()),
2046            StopReason::StopSequence
2047        );
2048    }
2049
2050    #[test]
2051    fn test_map_finish_reason_covers_vendor_specific_values() {
2052        assert_eq!(map_finish_reason("stop"), StopReason::EndTurn);
2053        assert_eq!(map_finish_reason("tool_calls"), StopReason::ToolUse);
2054        assert_eq!(map_finish_reason("length"), StopReason::MaxTokens);
2055        assert_eq!(
2056            map_finish_reason("content_filter"),
2057            StopReason::StopSequence
2058        );
2059        assert_eq!(map_finish_reason("sensitive"), StopReason::Refusal);
2060        assert_eq!(map_finish_reason("network_error"), StopReason::StopSequence);
2061        assert_eq!(
2062            map_finish_reason("some_new_reason"),
2063            StopReason::StopSequence
2064        );
2065    }
2066
2067    // ===================
2068    // Message Conversion Tests
2069    // ===================
2070
2071    #[test]
2072    fn test_build_api_messages_with_system() {
2073        let request = ChatRequest {
2074            system: "You are helpful.".to_string(),
2075            messages: vec![agent_sdk_foundation::llm::Message::user("Hello")],
2076            tools: None,
2077            max_tokens: 1024,
2078            max_tokens_explicit: true,
2079            session_id: None,
2080            cached_content: None,
2081            thinking: None,
2082            tool_choice: None,
2083            response_format: None,
2084            cache: None,
2085        };
2086
2087        let api_messages = build_api_messages(&request);
2088        assert_eq!(api_messages.len(), 2);
2089        assert_eq!(api_messages[0].role, ApiRole::System);
2090        assert_eq!(
2091            api_messages[0].content,
2092            Some("You are helpful.".to_string())
2093        );
2094        assert_eq!(api_messages[1].role, ApiRole::User);
2095        assert_eq!(api_messages[1].content, Some("Hello".to_string()));
2096    }
2097
2098    #[test]
2099    fn test_build_api_messages_empty_system() {
2100        let request = ChatRequest {
2101            system: String::new(),
2102            messages: vec![agent_sdk_foundation::llm::Message::user("Hello")],
2103            tools: None,
2104            max_tokens: 1024,
2105            max_tokens_explicit: true,
2106            session_id: None,
2107            cached_content: None,
2108            thinking: None,
2109            tool_choice: None,
2110            response_format: None,
2111            cache: None,
2112        };
2113
2114        let api_messages = build_api_messages(&request);
2115        assert_eq!(api_messages.len(), 1);
2116        assert_eq!(api_messages[0].role, ApiRole::User);
2117    }
2118
2119    fn request_with_messages(messages: Vec<agent_sdk_foundation::llm::Message>) -> ChatRequest {
2120        ChatRequest {
2121            system: String::new(),
2122            messages,
2123            tools: None,
2124            max_tokens: 1024,
2125            max_tokens_explicit: true,
2126            session_id: None,
2127            cached_content: None,
2128            thinking: None,
2129            tool_choice: None,
2130            response_format: None,
2131            cache: None,
2132        }
2133    }
2134
2135    #[test]
2136    fn test_build_api_messages_echoes_assistant_reasoning_content_on_tool_call()
2137    -> anyhow::Result<()> {
2138        // DeepSeek V4 thinking-mode requires the prior assistant turn's
2139        // reasoning to be echoed back as `reasoning_content` ONLY on a turn
2140        // that also performed a tool call, or the API 400s.
2141        let request = request_with_messages(vec![
2142            agent_sdk_foundation::llm::Message::user("What is the weather?"),
2143            agent_sdk_foundation::llm::Message::assistant_with_content(vec![
2144                ContentBlock::Thinking {
2145                    thinking: "I should call the weather tool.".to_string(),
2146                    signature: None,
2147                },
2148                ContentBlock::ToolUse {
2149                    id: "call_1".to_string(),
2150                    name: "get_weather".to_string(),
2151                    input: serde_json::json!({"city": "Paris"}),
2152                    thought_signature: None,
2153                },
2154            ]),
2155        ]);
2156
2157        let api_messages = build_api_messages(&request);
2158        let assistant = api_messages
2159            .iter()
2160            .find(|m| m.role == ApiRole::Assistant)
2161            .context("assistant message present")?;
2162        assert!(assistant.tool_calls.is_some());
2163        assert_eq!(
2164            assistant.reasoning_content,
2165            Some("I should call the weather tool.".to_string())
2166        );
2167        Ok(())
2168    }
2169
2170    #[test]
2171    fn test_build_api_messages_reasoning_content_serializes_on_tool_call_turn() -> anyhow::Result<()>
2172    {
2173        let request = request_with_messages(vec![
2174            agent_sdk_foundation::llm::Message::assistant_with_content(vec![
2175                ContentBlock::Thinking {
2176                    thinking: "thinking out loud".to_string(),
2177                    signature: None,
2178                },
2179                ContentBlock::ToolUse {
2180                    id: "call_1".to_string(),
2181                    name: "do_thing".to_string(),
2182                    input: serde_json::json!({}),
2183                    thought_signature: None,
2184                },
2185            ]),
2186        ]);
2187
2188        let api_messages = build_api_messages(&request);
2189        let json = serde_json::to_string(&api_messages).context("serialize api messages")?;
2190        assert!(json.contains("\"reasoning_content\":\"thinking out loud\""));
2191        Ok(())
2192    }
2193
2194    #[test]
2195    fn test_build_api_messages_reasoning_only_turn_is_not_echoed() -> anyhow::Result<()> {
2196        // A reasoning-only assistant turn (no visible text, no tool call) must
2197        // NOT carry reasoning_content: legacy `deepseek-reasoner` 400s if
2198        // reasoning_content appears in input, and DeepSeek V4 thinking-mode only
2199        // needs it on tool-call turns. With no other payload the turn collapses
2200        // to nothing and is dropped entirely.
2201        let request = request_with_messages(vec![
2202            agent_sdk_foundation::llm::Message::assistant_with_content(vec![
2203                ContentBlock::Thinking {
2204                    thinking: "pondering".to_string(),
2205                    signature: None,
2206                },
2207            ]),
2208        ]);
2209
2210        let api_messages = build_api_messages(&request);
2211        let json = serde_json::to_string(&api_messages).context("serialize api messages")?;
2212        assert!(!json.contains("reasoning_content"));
2213        assert!(api_messages.is_empty());
2214        Ok(())
2215    }
2216
2217    #[test]
2218    fn test_build_api_messages_reasoning_with_text_no_tool_call_is_not_echoed() -> anyhow::Result<()>
2219    {
2220        // An assistant turn carrying reasoning + visible text but NO tool call
2221        // is emitted for its text, but its reasoning is NOT echoed back.
2222        let request = request_with_messages(vec![
2223            agent_sdk_foundation::llm::Message::user("What is 2+2?"),
2224            agent_sdk_foundation::llm::Message::assistant_with_content(vec![
2225                ContentBlock::Thinking {
2226                    thinking: "Let me add 2 and 2.".to_string(),
2227                    signature: None,
2228                },
2229                ContentBlock::Text {
2230                    text: "4".to_string(),
2231                },
2232            ]),
2233            agent_sdk_foundation::llm::Message::user("And 3+3?"),
2234        ]);
2235
2236        let api_messages = build_api_messages(&request);
2237        let json = serde_json::to_string(&api_messages).context("serialize api messages")?;
2238        assert!(!json.contains("reasoning_content"));
2239        let assistant = api_messages
2240            .iter()
2241            .find(|m| m.role == ApiRole::Assistant)
2242            .context("assistant message present")?;
2243        assert_eq!(assistant.content, Some("4".to_string()));
2244        assert_eq!(assistant.reasoning_content, None);
2245        Ok(())
2246    }
2247
2248    #[test]
2249    fn test_build_api_messages_normal_path_has_no_reasoning_content() -> anyhow::Result<()> {
2250        // Normal path unchanged: an assistant turn with no Thinking block must
2251        // not attach reasoning_content.
2252        let request = request_with_messages(vec![
2253            agent_sdk_foundation::llm::Message::user("hi"),
2254            agent_sdk_foundation::llm::Message::assistant_with_content(vec![ContentBlock::Text {
2255                text: "hello".to_string(),
2256            }]),
2257        ]);
2258
2259        let api_messages = build_api_messages(&request);
2260        let json = serde_json::to_string(&api_messages).context("serialize api messages")?;
2261        assert!(!json.contains("reasoning_content"));
2262        let assistant = api_messages
2263            .iter()
2264            .find(|m| m.role == ApiRole::Assistant)
2265            .context("assistant message present")?;
2266        assert_eq!(assistant.reasoning_content, None);
2267        Ok(())
2268    }
2269
2270    #[test]
2271    fn test_build_api_messages_does_not_attach_reasoning_to_user_blocks() {
2272        // A user turn carrying a Thinking block (unusual, but possible) must not
2273        // be turned into a reasoning_content echo.
2274        let request =
2275            request_with_messages(vec![agent_sdk_foundation::llm::Message::user_with_content(
2276                vec![
2277                    ContentBlock::Thinking {
2278                        thinking: "user-side thinking".to_string(),
2279                        signature: None,
2280                    },
2281                    ContentBlock::Text {
2282                        text: "question".to_string(),
2283                    },
2284                ],
2285            )]);
2286
2287        let api_messages = build_api_messages(&request);
2288        assert_eq!(api_messages.len(), 1);
2289        assert_eq!(api_messages[0].role, ApiRole::User);
2290        assert_eq!(api_messages[0].reasoning_content, None);
2291    }
2292
2293    #[test]
2294    fn test_convert_tool() {
2295        let tool = agent_sdk_foundation::llm::Tool {
2296            name: "test_tool".to_string(),
2297            description: "A test tool".to_string(),
2298            input_schema: serde_json::json!({"type": "object"}),
2299            display_name: "Test Tool".to_string(),
2300            tier: agent_sdk_foundation::ToolTier::Observe,
2301        };
2302
2303        let api_tool = convert_tool(tool);
2304        assert_eq!(api_tool.r#type, "function");
2305        assert_eq!(api_tool.function.name, "test_tool");
2306        assert_eq!(api_tool.function.description, "A test tool");
2307    }
2308
2309    #[test]
2310    fn test_build_content_blocks_text_only() {
2311        let message = ApiResponseMessage {
2312            content: Some("Hello!".to_string()),
2313            tool_calls: None,
2314            reasoning_content: None,
2315            reasoning: None,
2316        };
2317
2318        let blocks = build_content_blocks(&message);
2319        assert_eq!(blocks.len(), 1);
2320        assert!(matches!(&blocks[0], ContentBlock::Text { text } if text == "Hello!"));
2321    }
2322
2323    #[test]
2324    fn test_build_content_blocks_with_tool_calls() {
2325        let message = ApiResponseMessage {
2326            content: Some("Let me help.".to_string()),
2327            tool_calls: Some(vec![ApiResponseToolCall {
2328                id: "call_123".to_string(),
2329                function: ApiResponseFunctionCall {
2330                    name: "read_file".to_string(),
2331                    arguments: "{\"path\": \"test.txt\"}".to_string(),
2332                },
2333            }]),
2334            reasoning_content: None,
2335            reasoning: None,
2336        };
2337
2338        let blocks = build_content_blocks(&message);
2339        assert_eq!(blocks.len(), 2);
2340        assert!(matches!(&blocks[0], ContentBlock::Text { text } if text == "Let me help."));
2341        assert!(
2342            matches!(&blocks[1], ContentBlock::ToolUse { id, name, .. } if id == "call_123" && name == "read_file")
2343        );
2344    }
2345
2346    #[test]
2347    fn test_build_content_blocks_falls_back_to_reasoning_content_when_content_empty() {
2348        // DeepSeek-style: answer / usable output arrives in reasoning_content
2349        // while content is null. Without the fallback this dropped all output.
2350        let message = ApiResponseMessage {
2351            content: None,
2352            tool_calls: None,
2353            reasoning_content: Some("The answer is 42.".to_string()),
2354            reasoning: None,
2355        };
2356
2357        let blocks = build_content_blocks(&message);
2358        assert_eq!(blocks.len(), 1);
2359        assert!(
2360            matches!(&blocks[0], ContentBlock::Thinking { thinking, signature } if thinking == "The answer is 42." && signature.is_none())
2361        );
2362    }
2363
2364    #[test]
2365    fn test_build_content_blocks_falls_back_to_reasoning_field() {
2366        // Some OpenRouter upstreams normalize reasoning under `reasoning`.
2367        let message = ApiResponseMessage {
2368            content: Some(String::new()),
2369            tool_calls: None,
2370            reasoning_content: None,
2371            reasoning: Some("Considering options...".to_string()),
2372        };
2373
2374        let blocks = build_content_blocks(&message);
2375        assert_eq!(blocks.len(), 1);
2376        assert!(
2377            matches!(&blocks[0], ContentBlock::Thinking { thinking, .. } if thinking == "Considering options...")
2378        );
2379    }
2380
2381    #[test]
2382    fn test_build_content_blocks_prefers_reasoning_content_over_reasoning() {
2383        let message = ApiResponseMessage {
2384            content: None,
2385            tool_calls: None,
2386            reasoning_content: Some("primary".to_string()),
2387            reasoning: Some("secondary".to_string()),
2388        };
2389
2390        let blocks = build_content_blocks(&message);
2391        assert_eq!(blocks.len(), 1);
2392        assert!(
2393            matches!(&blocks[0], ContentBlock::Thinking { thinking, .. } if thinking == "primary")
2394        );
2395    }
2396
2397    #[test]
2398    fn test_build_content_blocks_does_not_add_reasoning_when_content_present() {
2399        // The normal content-present case must be unchanged: reasoning is NOT
2400        // surfaced as a Thinking block when there is usable text content.
2401        let message = ApiResponseMessage {
2402            content: Some("Final answer.".to_string()),
2403            tool_calls: None,
2404            reasoning_content: Some("internal chain of thought".to_string()),
2405            reasoning: None,
2406        };
2407
2408        let blocks = build_content_blocks(&message);
2409        assert_eq!(blocks.len(), 1);
2410        assert!(matches!(&blocks[0], ContentBlock::Text { text } if text == "Final answer."));
2411    }
2412
2413    #[test]
2414    fn test_build_content_blocks_reasoning_fallback_with_tool_calls() {
2415        // Empty content + reasoning + a tool call: surface the reasoning AND the
2416        // tool call (reasoning model under tight max_tokens that still tool-called).
2417        let message = ApiResponseMessage {
2418            content: None,
2419            tool_calls: Some(vec![ApiResponseToolCall {
2420                id: "call_1".to_string(),
2421                function: ApiResponseFunctionCall {
2422                    name: "search".to_string(),
2423                    arguments: "{}".to_string(),
2424                },
2425            }]),
2426            reasoning_content: Some("I should search.".to_string()),
2427            reasoning: None,
2428        };
2429
2430        let blocks = build_content_blocks(&message);
2431        assert_eq!(blocks.len(), 2);
2432        assert!(
2433            matches!(&blocks[0], ContentBlock::Thinking { thinking, .. } if thinking == "I should search.")
2434        );
2435        assert!(matches!(&blocks[1], ContentBlock::ToolUse { name, .. } if name == "search"));
2436    }
2437
2438    #[test]
2439    fn test_build_content_blocks_empty_message_yields_no_blocks() {
2440        // Genuine truncation with no reasoning text: still produce nothing
2441        // (behavior unchanged for the empty case).
2442        let message = ApiResponseMessage {
2443            content: None,
2444            tool_calls: None,
2445            reasoning_content: None,
2446            reasoning: None,
2447        };
2448
2449        let blocks = build_content_blocks(&message);
2450        assert!(blocks.is_empty());
2451    }
2452
2453    #[test]
2454    fn test_api_response_message_deserializes_reasoning_content() {
2455        let json = r#"{
2456            "content": null,
2457            "reasoning_content": "step by step"
2458        }"#;
2459
2460        let message: ApiResponseMessage = serde_json::from_str(json).unwrap();
2461        assert_eq!(reasoning_text(&message), Some("step by step"));
2462        assert!(message.content.is_none());
2463    }
2464
2465    // ===================
2466    // SSE Streaming Type Tests
2467    // ===================
2468
2469    #[test]
2470    fn test_sse_chunk_text_delta_deserialization() {
2471        let json = r#"{
2472            "choices": [{
2473                "delta": {
2474                    "content": "Hello"
2475                },
2476                "finish_reason": null
2477            }]
2478        }"#;
2479
2480        let chunk: SseChunk = serde_json::from_str(json).unwrap();
2481        assert_eq!(chunk.choices.len(), 1);
2482        assert_eq!(chunk.choices[0].delta.content, Some("Hello".to_string()));
2483        assert!(chunk.choices[0].finish_reason.is_none());
2484    }
2485
2486    #[test]
2487    fn test_sse_chunk_tool_call_delta_deserialization() {
2488        let json = r#"{
2489            "choices": [{
2490                "delta": {
2491                    "tool_calls": [{
2492                        "index": 0,
2493                        "id": "call_abc",
2494                        "function": {
2495                            "name": "read_file",
2496                            "arguments": ""
2497                        }
2498                    }]
2499                },
2500                "finish_reason": null
2501            }]
2502        }"#;
2503
2504        let chunk: SseChunk = serde_json::from_str(json).unwrap();
2505        let tool_calls = chunk.choices[0].delta.tool_calls.as_ref().unwrap();
2506        assert_eq!(tool_calls.len(), 1);
2507        assert_eq!(tool_calls[0].index, 0);
2508        assert_eq!(tool_calls[0].id, Some("call_abc".to_string()));
2509        assert_eq!(
2510            tool_calls[0].function.as_ref().unwrap().name,
2511            Some("read_file".to_string())
2512        );
2513    }
2514
2515    #[test]
2516    fn test_sse_chunk_tool_call_arguments_delta_deserialization() {
2517        let json = r#"{
2518            "choices": [{
2519                "delta": {
2520                    "tool_calls": [{
2521                        "index": 0,
2522                        "function": {
2523                            "arguments": "{\"path\":"
2524                        }
2525                    }]
2526                },
2527                "finish_reason": null
2528            }]
2529        }"#;
2530
2531        let chunk: SseChunk = serde_json::from_str(json).unwrap();
2532        let tool_calls = chunk.choices[0].delta.tool_calls.as_ref().unwrap();
2533        assert_eq!(tool_calls[0].id, None);
2534        assert_eq!(
2535            tool_calls[0].function.as_ref().unwrap().arguments,
2536            Some("{\"path\":".to_string())
2537        );
2538    }
2539
2540    #[test]
2541    fn test_sse_chunk_with_finish_reason_deserialization() {
2542        let json = r#"{
2543            "choices": [{
2544                "delta": {},
2545                "finish_reason": "stop"
2546            }]
2547        }"#;
2548
2549        let chunk: SseChunk = serde_json::from_str(json).unwrap();
2550        assert_eq!(chunk.choices[0].finish_reason.as_deref(), Some("stop"));
2551    }
2552
2553    #[test]
2554    fn test_sse_chunk_with_usage_deserialization() {
2555        let json = r#"{
2556            "choices": [{
2557                "delta": {},
2558                "finish_reason": "stop"
2559            }],
2560            "usage": {
2561                "prompt_tokens": 100,
2562                "completion_tokens": 50
2563            }
2564        }"#;
2565
2566        let chunk: SseChunk = serde_json::from_str(json).unwrap();
2567        let usage = chunk.usage.unwrap();
2568        assert_eq!(usage.prompt_tokens, 100);
2569        assert_eq!(usage.completion_tokens, 50);
2570    }
2571
2572    #[test]
2573    fn test_sse_chunk_with_float_usage_deserialization() {
2574        let json = r#"{
2575            "choices": [{
2576                "delta": {},
2577                "finish_reason": "stop"
2578            }],
2579            "usage": {
2580                "prompt_tokens": 100.0,
2581                "completion_tokens": 50.0
2582            }
2583        }"#;
2584
2585        let chunk: SseChunk = serde_json::from_str(json).unwrap();
2586        let usage = chunk.usage.unwrap();
2587        assert_eq!(usage.prompt_tokens, 100);
2588        assert_eq!(usage.completion_tokens, 50);
2589    }
2590
2591    #[test]
2592    fn test_api_usage_deserializes_integer_compatible_numbers() {
2593        let json = r#"{
2594            "prompt_tokens": 42.0,
2595            "completion_tokens": 7
2596        }"#;
2597
2598        let usage: ApiUsage = serde_json::from_str(json).unwrap();
2599        assert_eq!(usage.prompt_tokens, 42);
2600        assert_eq!(usage.completion_tokens, 7);
2601    }
2602
2603    #[test]
2604    fn test_api_usage_deserializes_cached_tokens() {
2605        let json = r#"{
2606            "prompt_tokens": 42,
2607            "completion_tokens": 7,
2608            "prompt_tokens_details": {
2609                "cached_tokens": 10
2610            }
2611        }"#;
2612
2613        let usage: ApiUsage = serde_json::from_str(json).unwrap();
2614        assert_eq!(usage.prompt_tokens, 42);
2615        assert_eq!(usage.completion_tokens, 7);
2616        assert_eq!(usage.prompt_tokens_details.unwrap().cached_tokens, 10);
2617    }
2618
2619    #[test]
2620    fn test_process_sse_data_maps_cached_tokens_to_cache_read_usage() {
2621        let results = process_sse_data(
2622            r#"{
2623                "choices": [],
2624                "usage": {
2625                    "prompt_tokens": 42,
2626                    "completion_tokens": 7,
2627                    "prompt_tokens_details": {
2628                        "cached_tokens": 10
2629                    }
2630                }
2631            }"#,
2632        );
2633
2634        assert!(matches!(
2635            results.as_slice(),
2636            [SseProcessResult::Usage(Usage {
2637                input_tokens: 42,
2638                output_tokens: 7,
2639                cached_input_tokens: 10,
2640                cache_creation_input_tokens: 0,
2641            })]
2642        ));
2643    }
2644
2645    #[test]
2646    fn test_sse_delta_deserializes_reasoning_fields() -> anyhow::Result<()> {
2647        // The streaming delta struct must accept DeepSeek `reasoning_content`
2648        // and OpenRouter-normalized `reasoning` so reasoning tokens are not
2649        // dropped on deserialization.
2650        let chunk: SseChunk = serde_json::from_str(
2651            r#"{
2652                "choices": [{
2653                    "delta": {
2654                        "reasoning_content": "step one"
2655                    },
2656                    "finish_reason": null
2657                }]
2658            }"#,
2659        )
2660        .context("deserialize sse chunk")?;
2661        assert_eq!(
2662            chunk.choices[0].delta.reasoning_content,
2663            Some("step one".to_string())
2664        );
2665        assert!(chunk.choices[0].delta.content.is_none());
2666        Ok(())
2667    }
2668
2669    #[test]
2670    fn test_process_sse_data_emits_thinking_delta_from_reasoning_content() {
2671        // Reasoning-model fallback under streaming: a delta whose visible
2672        // `content` is absent but whose `reasoning_content` carries tokens must
2673        // surface as a ThinkingDelta, mirroring the non-streaming fallback so the
2674        // output is not silently dropped.
2675        let results = process_sse_data(
2676            r#"{
2677                "choices": [{
2678                    "delta": { "reasoning_content": "thinking..." },
2679                    "finish_reason": null
2680                }]
2681            }"#,
2682        );
2683
2684        assert!(matches!(
2685            results.as_slice(),
2686            [SseProcessResult::ThinkingDelta(text)] if text == "thinking..."
2687        ));
2688    }
2689
2690    #[test]
2691    fn test_process_sse_data_emits_thinking_delta_from_reasoning_field() {
2692        // OpenRouter-normalized `reasoning` field is an equivalent fallback.
2693        let results = process_sse_data(
2694            r#"{
2695                "choices": [{
2696                    "delta": { "reasoning": "pondering" },
2697                    "finish_reason": null
2698                }]
2699            }"#,
2700        );
2701
2702        assert!(matches!(
2703            results.as_slice(),
2704            [SseProcessResult::ThinkingDelta(text)] if text == "pondering"
2705        ));
2706    }
2707
2708    #[test]
2709    fn test_process_sse_data_prefers_text_content_over_reasoning() {
2710        // When visible `content` is present, it takes precedence and the
2711        // reasoning fallback does not fire (mirrors non-streaming behavior).
2712        let results = process_sse_data(
2713            r#"{
2714                "choices": [{
2715                    "delta": {
2716                        "content": "answer",
2717                        "reasoning_content": "ignored"
2718                    },
2719                    "finish_reason": null
2720                }]
2721            }"#,
2722        );
2723
2724        assert!(matches!(
2725            results.as_slice(),
2726            [SseProcessResult::TextDelta(text)] if text == "answer"
2727        ));
2728    }
2729
2730    #[test]
2731    fn test_process_sse_data_empty_content_falls_back_to_reasoning() {
2732        // An explicitly empty `content` string must still trigger the reasoning
2733        // fallback rather than emitting an empty TextDelta.
2734        let results = process_sse_data(
2735            r#"{
2736                "choices": [{
2737                    "delta": {
2738                        "content": "",
2739                        "reasoning_content": "fallback"
2740                    },
2741                    "finish_reason": null
2742                }]
2743            }"#,
2744        );
2745
2746        assert!(matches!(
2747            results.as_slice(),
2748            [SseProcessResult::ThinkingDelta(text)] if text == "fallback"
2749        ));
2750    }
2751
2752    #[test]
2753    fn test_api_usage_rejects_fractional_numbers() {
2754        let json = r#"{
2755            "prompt_tokens": 42.5,
2756            "completion_tokens": 7
2757        }"#;
2758
2759        let usage: std::result::Result<ApiUsage, _> = serde_json::from_str(json);
2760        assert!(usage.is_err());
2761    }
2762
2763    #[test]
2764    fn test_use_max_tokens_alias_for_vendor_urls() {
2765        assert!(!use_max_tokens_alias(DEFAULT_BASE_URL));
2766        assert!(use_max_tokens_alias(BASE_URL_KIMI));
2767        assert!(use_max_tokens_alias(BASE_URL_ZAI));
2768        assert!(use_max_tokens_alias(BASE_URL_MINIMAX));
2769    }
2770
2771    #[test]
2772    fn test_requires_responses_api_only_for_legacy_codex_model() {
2773        assert!(requires_responses_api(MODEL_GPT52_CODEX));
2774        assert!(!requires_responses_api(MODEL_GPT53_CODEX));
2775        assert!(!requires_responses_api(MODEL_GPT54));
2776    }
2777
2778    #[test]
2779    fn test_should_use_responses_api_for_official_agentic_requests() {
2780        let request = ChatRequest {
2781            system: String::new(),
2782            messages: vec![agent_sdk_foundation::llm::Message::user("Hello")],
2783            tools: Some(vec![agent_sdk_foundation::llm::Tool {
2784                name: "read_file".to_string(),
2785                description: "Read a file".to_string(),
2786                input_schema: serde_json::json!({"type": "object"}),
2787                display_name: "Read File".to_string(),
2788                tier: agent_sdk_foundation::ToolTier::Observe,
2789            }]),
2790            max_tokens: 1024,
2791            max_tokens_explicit: true,
2792            session_id: Some("thread-1".to_string()),
2793            cached_content: None,
2794            thinking: None,
2795            tool_choice: None,
2796            response_format: None,
2797            cache: None,
2798        };
2799
2800        assert!(should_use_responses_api(
2801            DEFAULT_BASE_URL,
2802            MODEL_GPT54,
2803            &request
2804        ));
2805        assert!(!should_use_responses_api(
2806            BASE_URL_KIMI,
2807            MODEL_GPT54,
2808            &request
2809        ));
2810    }
2811
2812    #[test]
2813    fn test_build_api_reasoning_maps_enabled_budget_to_effort() {
2814        let reasoning = build_api_reasoning(Some(&ThinkingConfig::new(40_000))).unwrap();
2815        assert!(matches!(reasoning.effort, ReasoningEffort::XHigh));
2816    }
2817
2818    #[test]
2819    fn test_build_api_reasoning_uses_explicit_effort() {
2820        let reasoning =
2821            build_api_reasoning(Some(&ThinkingConfig::adaptive_with_effort(Effort::High))).unwrap();
2822        assert!(matches!(reasoning.effort, ReasoningEffort::High));
2823    }
2824
2825    #[test]
2826    fn test_build_api_reasoning_omits_adaptive_without_effort() {
2827        assert!(build_api_reasoning(Some(&ThinkingConfig::adaptive())).is_none());
2828    }
2829
2830    #[test]
2831    fn test_openai_rejects_adaptive_thinking() {
2832        let provider = OpenAIProvider::gpt54("test-key".to_string());
2833        let error = provider
2834            .validate_thinking_config(Some(&ThinkingConfig::adaptive()))
2835            .unwrap_err();
2836        assert!(
2837            error
2838                .to_string()
2839                .contains("adaptive thinking is not supported")
2840        );
2841    }
2842
2843    #[test]
2844    fn test_openai_non_reasoning_models_reject_thinking() {
2845        let provider = OpenAIProvider::gpt4o("test-key".to_string());
2846        let error = provider
2847            .validate_thinking_config(Some(&ThinkingConfig::new(10_000)))
2848            .unwrap_err();
2849        assert!(error.to_string().contains("thinking is not supported"));
2850    }
2851
2852    #[test]
2853    fn test_request_serialization_openai_uses_max_completion_tokens_only() {
2854        let messages = vec![ApiMessage {
2855            role: ApiRole::User,
2856            content: Some("Hello".to_string()),
2857            reasoning_content: None,
2858            tool_calls: None,
2859            tool_call_id: None,
2860        }];
2861
2862        let request = ApiChatRequest {
2863            model: "gpt-4o",
2864            messages: &messages,
2865            max_completion_tokens: Some(1024),
2866            max_tokens: None,
2867            tools: None,
2868            tool_choice: None,
2869            reasoning: None,
2870            response_format: None,
2871        };
2872
2873        let json = serde_json::to_string(&request).unwrap();
2874        assert!(json.contains("\"max_completion_tokens\":1024"));
2875        assert!(!json.contains("\"max_tokens\""));
2876    }
2877
2878    #[test]
2879    fn test_request_serialization_with_max_tokens_alias() {
2880        let messages = vec![ApiMessage {
2881            role: ApiRole::User,
2882            content: Some("Hello".to_string()),
2883            reasoning_content: None,
2884            tool_calls: None,
2885            tool_call_id: None,
2886        }];
2887
2888        let request = ApiChatRequest {
2889            model: "glm-5",
2890            messages: &messages,
2891            max_completion_tokens: Some(1024),
2892            max_tokens: Some(1024),
2893            tools: None,
2894            tool_choice: None,
2895            reasoning: None,
2896            response_format: None,
2897        };
2898
2899        let json = serde_json::to_string(&request).unwrap();
2900        assert!(json.contains("\"max_completion_tokens\":1024"));
2901        assert!(json.contains("\"max_tokens\":1024"));
2902    }
2903
2904    #[test]
2905    fn test_streaming_request_serialization_openai_default() {
2906        let messages = vec![ApiMessage {
2907            role: ApiRole::User,
2908            content: Some("Hello".to_string()),
2909            reasoning_content: None,
2910            tool_calls: None,
2911            tool_call_id: None,
2912        }];
2913
2914        let request = ApiChatRequestStreaming {
2915            model: "gpt-4o",
2916            messages: &messages,
2917            max_completion_tokens: Some(1024),
2918            max_tokens: None,
2919            tools: None,
2920            tool_choice: None,
2921            reasoning: None,
2922            response_format: None,
2923            stream_options: Some(ApiStreamOptions {
2924                include_usage: true,
2925            }),
2926            usage: None,
2927            stream: true,
2928        };
2929
2930        let json = serde_json::to_string(&request).unwrap();
2931        assert!(json.contains("\"stream\":true"));
2932        assert!(json.contains("\"model\":\"gpt-4o\""));
2933        assert!(json.contains("\"max_completion_tokens\":1024"));
2934        assert!(json.contains("\"stream_options\":{\"include_usage\":true}"));
2935        assert!(!json.contains("\"max_tokens\""));
2936    }
2937
2938    #[test]
2939    fn stream_usage_is_requested_for_every_endpoint() {
2940        // issue #302: usage must be requested on ALL OpenAI-compatible
2941        // endpoints, not just api.openai.com, so OpenRouter/Baseten/local
2942        // turns report token usage to cost ledgers and budgets.
2943        assert!(use_stream_usage_options("https://api.openai.com/v1"));
2944        assert!(use_stream_usage_options("https://openrouter.ai/api/v1"));
2945        assert!(use_stream_usage_options("https://host.baseten.co/v1"));
2946        assert!(use_stream_usage_options("http://localhost:1234/v1"));
2947    }
2948
2949    #[test]
2950    fn openrouter_usage_flag_only_for_openrouter() {
2951        assert!(use_openrouter_usage_options("https://openrouter.ai/api/v1"));
2952        assert!(!use_openrouter_usage_options("https://api.openai.com/v1"));
2953    }
2954
2955    #[test]
2956    fn streaming_request_serializes_openrouter_usage_flag() -> anyhow::Result<()> {
2957        let messages = vec![ApiMessage {
2958            role: ApiRole::User,
2959            content: Some("hi".to_string()),
2960            reasoning_content: None,
2961            tool_calls: None,
2962            tool_call_id: None,
2963        }];
2964        let request = ApiChatRequestStreaming {
2965            model: "anthropic/claude-3.5",
2966            messages: &messages,
2967            max_completion_tokens: Some(16),
2968            max_tokens: None,
2969            tools: None,
2970            tool_choice: None,
2971            reasoning: None,
2972            response_format: None,
2973            stream_options: Some(ApiStreamOptions {
2974                include_usage: true,
2975            }),
2976            usage: Some(ApiOpenRouterUsageOptions { include: true }),
2977            stream: true,
2978        };
2979        let json = serde_json::to_string(&request)?;
2980        assert!(json.contains("\"usage\":{\"include\":true}"));
2981        assert!(json.contains("\"stream_options\":{\"include_usage\":true}"));
2982        Ok(())
2983    }
2984
2985    #[test]
2986    fn usage_only_chunk_without_choices_deserializes() -> anyhow::Result<()> {
2987        // OpenAI's trailing usage frame (and some OpenRouter frames) omit
2988        // `choices` entirely; the chunk must still deserialize so the usage is
2989        // captured instead of being silently dropped (issue #302).
2990        let no_choices: SseChunk = serde_json::from_str("{}")?;
2991        assert!(no_choices.choices.is_empty());
2992
2993        let usage_only: SseChunk =
2994            serde_json::from_str(r#"{"usage":{"prompt_tokens":10,"completion_tokens":5}}"#)?;
2995        assert!(usage_only.choices.is_empty());
2996        assert!(usage_only.usage.is_some());
2997        Ok(())
2998    }
2999
3000    #[test]
3001    fn test_streaming_request_serialization_with_max_tokens_alias() {
3002        let messages = vec![ApiMessage {
3003            role: ApiRole::User,
3004            content: Some("Hello".to_string()),
3005            reasoning_content: None,
3006            tool_calls: None,
3007            tool_call_id: None,
3008        }];
3009
3010        let request = ApiChatRequestStreaming {
3011            model: "kimi-k2-thinking",
3012            messages: &messages,
3013            max_completion_tokens: Some(1024),
3014            max_tokens: Some(1024),
3015            tools: None,
3016            tool_choice: None,
3017            reasoning: None,
3018            response_format: None,
3019            stream_options: None,
3020            usage: None,
3021            stream: true,
3022        };
3023
3024        let json = serde_json::to_string(&request).unwrap();
3025        assert!(json.contains("\"max_completion_tokens\":1024"));
3026        assert!(json.contains("\"max_tokens\":1024"));
3027        assert!(!json.contains("\"stream_options\""));
3028    }
3029
3030    #[test]
3031    fn test_request_serialization_includes_reasoning_when_present() {
3032        let messages = vec![ApiMessage {
3033            role: ApiRole::User,
3034            content: Some("Hello".to_string()),
3035            reasoning_content: None,
3036            tool_calls: None,
3037            tool_call_id: None,
3038        }];
3039
3040        let request = ApiChatRequest {
3041            model: MODEL_GPT54,
3042            messages: &messages,
3043            max_completion_tokens: Some(1024),
3044            max_tokens: None,
3045            tools: None,
3046            tool_choice: None,
3047            reasoning: Some(ApiReasoning {
3048                effort: ReasoningEffort::High,
3049            }),
3050            response_format: None,
3051        };
3052
3053        let json = serde_json::to_string(&request).unwrap();
3054        assert!(json.contains("\"reasoning\":{\"effort\":\"high\"}"));
3055    }
3056
3057    #[test]
3058    fn test_response_format_serializes_as_json_schema() {
3059        let messages = vec![ApiMessage {
3060            role: ApiRole::User,
3061            content: Some("Hello".to_string()),
3062            reasoning_content: None,
3063            tool_calls: None,
3064            tool_call_id: None,
3065        }];
3066
3067        let response_format = Some(ApiResponseFormat::from_response_format(
3068            &agent_sdk_foundation::llm::ResponseFormat::new(
3069                "person",
3070                serde_json::json!({"type": "object"}),
3071            ),
3072        ));
3073
3074        let request = ApiChatRequest {
3075            model: "gpt-4o",
3076            messages: &messages,
3077            max_completion_tokens: Some(1024),
3078            max_tokens: None,
3079            tools: None,
3080            tool_choice: None,
3081            reasoning: None,
3082            response_format,
3083        };
3084
3085        let json = serde_json::to_value(&request).unwrap();
3086        assert_eq!(json["response_format"]["type"], "json_schema");
3087        assert_eq!(json["response_format"]["json_schema"]["name"], "person");
3088        assert_eq!(json["response_format"]["json_schema"]["strict"], true);
3089        assert_eq!(
3090            json["response_format"]["json_schema"]["schema"]["type"],
3091            "object"
3092        );
3093    }
3094
3095    #[test]
3096    fn test_step_completion_stream_emits_trailing_usage_after_finish_reason() {
3097        // Official OpenAI with stream_options.include_usage sends the usage in a
3098        // SEPARATE chunk (choices: []) AFTER the finish_reason chunk, then [DONE].
3099        // The streaming loop must keep consuming past finish_reason so that usage
3100        // is captured and emitted (previously it returned early on Done, dropping
3101        // the usage entirely).
3102        let mut tool_calls: HashMap<usize, ToolCallAccumulator> = HashMap::new();
3103        let mut usage: Option<Usage> = None;
3104        let mut stop_reason: Option<StopReason> = None;
3105
3106        // Chunk 1: text delta + finish_reason — must NOT finalize.
3107        let o1 = step_completion_stream(
3108            r#"{"choices":[{"delta":{"content":"hi"},"finish_reason":"stop"}]}"#,
3109            &mut tool_calls,
3110            &mut usage,
3111            &mut stop_reason,
3112        );
3113        assert!(o1.terminal.is_none());
3114        assert!(matches!(stop_reason, Some(StopReason::EndTurn)));
3115
3116        // Chunk 2: usage-only trailing chunk (choices: []).
3117        let o2 = step_completion_stream(
3118            r#"{"choices":[],"usage":{"prompt_tokens":10,"completion_tokens":5}}"#,
3119            &mut tool_calls,
3120            &mut usage,
3121            &mut stop_reason,
3122        );
3123        assert!(o2.terminal.is_none());
3124
3125        // Chunk 3: [DONE] sentinel finalizes and must carry the trailing usage.
3126        let o3 = step_completion_stream("[DONE]", &mut tool_calls, &mut usage, &mut stop_reason);
3127        let terminal = o3.terminal.expect("[DONE] finalizes the stream");
3128        assert!(terminal.iter().any(|d| matches!(
3129            d,
3130            StreamDelta::Usage(Usage {
3131                input_tokens: 10,
3132                output_tokens: 5,
3133                ..
3134            })
3135        )));
3136        assert!(terminal.iter().any(|d| matches!(
3137            d,
3138            StreamDelta::Done {
3139                stop_reason: Some(StopReason::EndTurn)
3140            }
3141        )));
3142    }
3143
3144    #[test]
3145    fn test_response_format_omitted_when_absent() {
3146        let messages = vec![ApiMessage {
3147            role: ApiRole::User,
3148            content: Some("Hello".to_string()),
3149            reasoning_content: None,
3150            tool_calls: None,
3151            tool_call_id: None,
3152        }];
3153
3154        let request = ApiChatRequest {
3155            model: "gpt-4o",
3156            messages: &messages,
3157            max_completion_tokens: Some(1024),
3158            max_tokens: None,
3159            tools: None,
3160            tool_choice: None,
3161            reasoning: None,
3162            response_format: None,
3163        };
3164
3165        let json = serde_json::to_string(&request).unwrap();
3166        assert!(!json.contains("response_format"));
3167    }
3168}