Skip to main content

agent_sdk_providers/impls/
openai.rs

1//! `OpenAI` API provider implementation.
2//!
3//! This module provides an implementation of `LlmProvider` for the `OpenAI`
4//! Chat Completions API. It also supports `OpenAI`-compatible APIs (Ollama, vLLM, etc.)
5//! via the `with_base_url` constructor.
6//!
7//! # Transparent Responses-API reroute
8//!
9//! Some requests cannot be served by Chat Completions and are transparently
10//! rerouted to the `OpenAI` Responses API
11//! ([`OpenAIResponsesProvider`]). The reroute (`should_use_responses_api`) fires
12//! when:
13//!
14//! - the model only exists on the Responses surface (e.g. `gpt-5.2-codex`), or
15//! - the request carries attachments (images / documents), or
16//! - the request is *agentic* (has tools or tool-use/tool-result blocks) against
17//!   the official `api.openai.com` base URL.
18//!
19//! The reroute forwards the provider's pooled HTTP client and `extra_headers`
20//! (the BYOK / gateway auth mechanism) so a rerouted request keeps connection
21//! reuse and authenticates identically to a non-rerouted one.
22
23use crate::attachments::{request_has_attachments, validate_request_attachments};
24use crate::provider::LlmProvider;
25use crate::streaming::{SseLineBuffer, StreamBox, StreamDelta, StreamErrorKind};
26use agent_sdk_foundation::llm::{
27    ChatOutcome, ChatRequest, ChatResponse, Content, ContentBlock, Effort, StopReason,
28    ThinkingConfig, ThinkingMode, Usage,
29};
30use anyhow::Result;
31use async_trait::async_trait;
32use futures::StreamExt;
33use reqwest::StatusCode;
34use serde::de::Error as _;
35use serde::{Deserialize, Serialize};
36use std::collections::HashMap;
37
38use super::openai_responses::OpenAIResponsesProvider;
39
40const DEFAULT_BASE_URL: &str = "https://api.openai.com/v1";
41
42/// Build an HTTP client with connect/keepalive timeouts matching the sibling
43/// providers (`anthropic`, `vertex`). A bare `reqwest::Client::new()` has no
44/// connect timeout, so a black-holed connect would wedge `chat`/`chat_stream`
45/// forever.
46fn build_http_client() -> reqwest::Client {
47    reqwest::Client::builder()
48        .connect_timeout(std::time::Duration::from_secs(30))
49        .tcp_keepalive(std::time::Duration::from_secs(30))
50        .build()
51        .unwrap_or_default()
52}
53
54/// Check if a model requires the Responses API instead of Chat Completions.
55fn requires_responses_api(model: &str) -> bool {
56    model == MODEL_GPT52_CODEX
57}
58
59fn is_official_openai_base_url(base_url: &str) -> bool {
60    base_url == DEFAULT_BASE_URL || base_url.contains("api.openai.com")
61}
62
63fn request_is_agentic(request: &ChatRequest) -> bool {
64    request
65        .tools
66        .as_ref()
67        .is_some_and(|tools| !tools.is_empty()) || request.messages.iter().any(|message| {
68        matches!(
69            &message.content,
70            Content::Blocks(blocks)
71                if blocks.iter().any(|block| {
72                    matches!(block, ContentBlock::ToolUse { .. } | ContentBlock::ToolResult { .. })
73                })
74        )
75    })
76}
77
78fn should_use_responses_api(base_url: &str, model: &str, request: &ChatRequest) -> bool {
79    requires_responses_api(model)
80        || request_has_attachments(request)
81        || (is_official_openai_base_url(base_url) && request_is_agentic(request))
82}
83
84// GPT-5.4 series
85pub const MODEL_GPT54: &str = "gpt-5.4";
86
87// GPT-5.3 Codex series
88pub const MODEL_GPT53_CODEX: &str = "gpt-5.3-codex";
89
90// GPT-5.2 series
91pub const MODEL_GPT52_INSTANT: &str = "gpt-5.2-instant";
92pub const MODEL_GPT52_THINKING: &str = "gpt-5.2-thinking";
93pub const MODEL_GPT52_PRO: &str = "gpt-5.2-pro";
94pub const MODEL_GPT52_CODEX: &str = "gpt-5.2-codex";
95
96// GPT-5 series (400k context)
97pub const MODEL_GPT5: &str = "gpt-5";
98pub const MODEL_GPT5_MINI: &str = "gpt-5-mini";
99pub const MODEL_GPT5_NANO: &str = "gpt-5-nano";
100
101// o-series reasoning models
102pub const MODEL_O3: &str = "o3";
103pub const MODEL_O3_MINI: &str = "o3-mini";
104pub const MODEL_O4_MINI: &str = "o4-mini";
105pub const MODEL_O1: &str = "o1";
106pub const MODEL_O1_MINI: &str = "o1-mini";
107
108// GPT-4.1 series (improved instruction following, 1M context)
109pub const MODEL_GPT41: &str = "gpt-4.1";
110pub const MODEL_GPT41_MINI: &str = "gpt-4.1-mini";
111pub const MODEL_GPT41_NANO: &str = "gpt-4.1-nano";
112
113// GPT-4o series
114pub const MODEL_GPT4O: &str = "gpt-4o";
115pub const MODEL_GPT4O_MINI: &str = "gpt-4o-mini";
116
117// OpenAI-compatible vendor defaults
118pub const BASE_URL_KIMI: &str = "https://api.moonshot.ai/v1";
119pub const BASE_URL_ZAI: &str = "https://api.z.ai/api/paas/v4";
120pub const BASE_URL_MINIMAX: &str = "https://api.minimax.io/v1";
121pub const MODEL_KIMI_K2_5: &str = "kimi-k2.5";
122pub const MODEL_KIMI_K2_THINKING: &str = "kimi-k2-thinking";
123pub const MODEL_ZAI_GLM5: &str = "glm-5";
124pub const MODEL_MINIMAX_M2_5: &str = "MiniMax-M2.5";
125
126/// `OpenAI` LLM provider using the Chat Completions API.
127///
128/// Also supports `OpenAI`-compatible APIs (Ollama, vLLM, Azure `OpenAI`, etc.)
129/// via the `with_base_url` constructor.
130#[derive(Clone)]
131pub struct OpenAIProvider {
132    client: reqwest::Client,
133    api_key: String,
134    model: String,
135    base_url: String,
136    thinking: Option<ThinkingConfig>,
137    /// Extra headers applied to every request (e.g. for gateway authentication).
138    extra_headers: Vec<(String, String)>,
139}
140
141impl OpenAIProvider {
142    /// The conventional environment variable holding the `OpenAI` API key.
143    pub const API_KEY_ENV: &'static str = "OPENAI_API_KEY";
144
145    /// Create a new `OpenAI` provider with the specified API key and model.
146    #[must_use]
147    pub fn new(api_key: impl Into<String>, model: impl Into<String>) -> Self {
148        Self {
149            client: build_http_client(),
150            api_key: api_key.into(),
151            model: model.into(),
152            base_url: DEFAULT_BASE_URL.to_owned(),
153            thinking: None,
154            extra_headers: Vec::new(),
155        }
156    }
157
158    /// Create a provider using GPT-5, reading the API key from the
159    /// conventional [`OPENAI_API_KEY`](Self::API_KEY_ENV) environment variable.
160    ///
161    /// # Panics
162    ///
163    /// Panics if `OPENAI_API_KEY` is not set. Prefer
164    /// [`try_from_env`](Self::try_from_env) outside of examples/tests.
165    #[must_use]
166    pub fn from_env() -> Self {
167        Self::try_from_env().unwrap_or_else(|e| panic!("{e}"))
168    }
169
170    /// Create a provider using GPT-5, reading the API key from the
171    /// conventional [`OPENAI_API_KEY`](Self::API_KEY_ENV) environment variable.
172    ///
173    /// # Errors
174    ///
175    /// Returns an error if `OPENAI_API_KEY` is unset or not valid UTF-8.
176    pub fn try_from_env() -> Result<Self> {
177        let api_key = std::env::var(Self::API_KEY_ENV).map_err(|_| {
178            anyhow::anyhow!("environment variable `{}` is not set", Self::API_KEY_ENV)
179        })?;
180        Ok(Self::gpt5(api_key))
181    }
182
183    /// Create a new provider with a custom base URL for OpenAI-compatible APIs.
184    #[must_use]
185    pub fn with_base_url(
186        api_key: impl Into<String>,
187        model: impl Into<String>,
188        base_url: impl Into<String>,
189    ) -> Self {
190        Self {
191            client: build_http_client(),
192            api_key: api_key.into(),
193            model: model.into(),
194            base_url: base_url.into(),
195            thinking: None,
196            extra_headers: Vec::new(),
197        }
198    }
199
200    /// Create a provider using Moonshot KIMI via OpenAI-compatible Chat Completions.
201    #[must_use]
202    pub fn kimi(api_key: String, model: String) -> Self {
203        Self::with_base_url(api_key, model, BASE_URL_KIMI.to_owned())
204    }
205
206    /// Create a provider using KIMI K2.5 (default KIMI model).
207    #[must_use]
208    pub fn kimi_k2_5(api_key: String) -> Self {
209        Self::kimi(api_key, MODEL_KIMI_K2_5.to_owned())
210    }
211
212    /// Create a provider using KIMI K2 Thinking.
213    #[must_use]
214    pub fn kimi_k2_thinking(api_key: String) -> Self {
215        Self::kimi(api_key, MODEL_KIMI_K2_THINKING.to_owned())
216    }
217
218    /// Create a provider using z.ai via OpenAI-compatible Chat Completions.
219    #[must_use]
220    pub fn zai(api_key: String, model: String) -> Self {
221        Self::with_base_url(api_key, model, BASE_URL_ZAI.to_owned())
222    }
223
224    /// Create a provider using z.ai GLM-5 (default z.ai agentic reasoning model).
225    #[must_use]
226    pub fn zai_glm5(api_key: String) -> Self {
227        Self::zai(api_key, MODEL_ZAI_GLM5.to_owned())
228    }
229
230    /// Create a provider using `MiniMax` via OpenAI-compatible Chat Completions.
231    #[must_use]
232    pub fn minimax(api_key: String, model: String) -> Self {
233        Self::with_base_url(api_key, model, BASE_URL_MINIMAX.to_owned())
234    }
235
236    /// Create a provider using `MiniMax` M2.5 (default `MiniMax` model).
237    #[must_use]
238    pub fn minimax_m2_5(api_key: String) -> Self {
239        Self::minimax(api_key, MODEL_MINIMAX_M2_5.to_owned())
240    }
241
242    /// Create a provider using GPT-5.2 Instant (speed-optimized for routine queries).
243    #[must_use]
244    pub fn gpt52_instant(api_key: String) -> Self {
245        Self::new(api_key, MODEL_GPT52_INSTANT.to_owned())
246    }
247
248    /// Create a provider using GPT-5.4 (frontier reasoning with 1.05M context).
249    #[must_use]
250    pub fn gpt54(api_key: String) -> Self {
251        Self::new(api_key, MODEL_GPT54.to_owned())
252    }
253
254    /// Create a provider using GPT-5.3 Codex (latest codex model).
255    #[must_use]
256    pub fn gpt53_codex(api_key: String) -> Self {
257        Self::new(api_key, MODEL_GPT53_CODEX.to_owned())
258    }
259
260    /// Create a provider using GPT-5.2 Thinking (complex reasoning, coding, analysis).
261    #[must_use]
262    pub fn gpt52_thinking(api_key: String) -> Self {
263        Self::new(api_key, MODEL_GPT52_THINKING.to_owned())
264    }
265
266    /// Create a provider using GPT-5.2 Pro (maximum accuracy for difficult problems).
267    #[must_use]
268    pub fn gpt52_pro(api_key: String) -> Self {
269        Self::new(api_key, MODEL_GPT52_PRO.to_owned())
270    }
271
272    /// Create a provider using the latest Codex model.
273    #[must_use]
274    pub fn codex(api_key: String) -> Self {
275        Self::gpt53_codex(api_key)
276    }
277
278    /// Create a provider using GPT-5 (400k context, coding and reasoning).
279    #[must_use]
280    pub fn gpt5(api_key: String) -> Self {
281        Self::new(api_key, MODEL_GPT5.to_owned())
282    }
283
284    /// Create a provider using GPT-5-mini (faster, cost-efficient GPT-5).
285    #[must_use]
286    pub fn gpt5_mini(api_key: String) -> Self {
287        Self::new(api_key, MODEL_GPT5_MINI.to_owned())
288    }
289
290    /// Create a provider using GPT-5-nano (fastest, cheapest GPT-5 variant).
291    #[must_use]
292    pub fn gpt5_nano(api_key: String) -> Self {
293        Self::new(api_key, MODEL_GPT5_NANO.to_owned())
294    }
295
296    /// Create a provider using o3 (most intelligent reasoning model).
297    #[must_use]
298    pub fn o3(api_key: String) -> Self {
299        Self::new(api_key, MODEL_O3.to_owned())
300    }
301
302    /// Create a provider using o3-mini (smaller o3 variant).
303    #[must_use]
304    pub fn o3_mini(api_key: String) -> Self {
305        Self::new(api_key, MODEL_O3_MINI.to_owned())
306    }
307
308    /// Create a provider using o4-mini (fast, cost-efficient reasoning).
309    #[must_use]
310    pub fn o4_mini(api_key: String) -> Self {
311        Self::new(api_key, MODEL_O4_MINI.to_owned())
312    }
313
314    /// Create a provider using o1 (reasoning model).
315    #[must_use]
316    pub fn o1(api_key: String) -> Self {
317        Self::new(api_key, MODEL_O1.to_owned())
318    }
319
320    /// Create a provider using o1-mini (fast reasoning model).
321    #[must_use]
322    pub fn o1_mini(api_key: String) -> Self {
323        Self::new(api_key, MODEL_O1_MINI.to_owned())
324    }
325
326    /// Create a provider using GPT-4.1 (improved instruction following, 1M context).
327    #[must_use]
328    pub fn gpt41(api_key: String) -> Self {
329        Self::new(api_key, MODEL_GPT41.to_owned())
330    }
331
332    /// Create a provider using GPT-4.1-mini (smaller, faster GPT-4.1).
333    #[must_use]
334    pub fn gpt41_mini(api_key: String) -> Self {
335        Self::new(api_key, MODEL_GPT41_MINI.to_owned())
336    }
337
338    /// Create a provider using GPT-4o.
339    #[must_use]
340    pub fn gpt4o(api_key: String) -> Self {
341        Self::new(api_key, MODEL_GPT4O.to_owned())
342    }
343
344    /// Create a provider using GPT-4o-mini (fast and cost-effective).
345    #[must_use]
346    pub fn gpt4o_mini(api_key: String) -> Self {
347        Self::new(api_key, MODEL_GPT4O_MINI.to_owned())
348    }
349
350    /// Set the provider-owned thinking configuration for this model.
351    #[must_use]
352    pub const fn with_thinking(mut self, thinking: ThinkingConfig) -> Self {
353        self.thinking = Some(thinking);
354        self
355    }
356
357    /// Add extra HTTP headers applied to every request.
358    #[must_use]
359    pub fn with_extra_headers(mut self, headers: Vec<(String, String)>) -> Self {
360        self.extra_headers = headers;
361        self
362    }
363
364    /// Apply auth + extra headers. Skips `Authorization` when `api_key` is
365    /// empty (BYOK gateway mode — auth handled via `extra_headers`).
366    fn apply_headers(&self, builder: reqwest::RequestBuilder) -> reqwest::RequestBuilder {
367        let builder = if self.api_key.is_empty() {
368            builder
369        } else {
370            builder.header("Authorization", format!("Bearer {}", self.api_key))
371        };
372        self.extra_headers
373            .iter()
374            .fold(builder, |b, (k, v)| b.header(k.as_str(), v.as_str()))
375    }
376
377    /// Build the `OpenAIResponsesProvider` used for the transparent Responses-API
378    /// reroute, forwarding this provider's pooled client, thinking config, and
379    /// extra headers so the rerouted request reuses connections and authenticates
380    /// identically (critical for BYOK / gateway setups with an empty `api_key`).
381    fn responses_reroute(&self) -> OpenAIResponsesProvider {
382        let mut provider = OpenAIResponsesProvider::with_base_url(
383            self.api_key.clone(),
384            self.model.clone(),
385            self.base_url.clone(),
386        )
387        .with_client(self.client.clone())
388        .with_extra_headers(self.extra_headers.clone());
389        if let Some(thinking) = self.thinking.clone() {
390            provider = provider.with_thinking(thinking);
391        }
392        provider
393    }
394}
395
396#[async_trait]
397impl LlmProvider for OpenAIProvider {
398    async fn chat(&self, request: ChatRequest) -> Result<ChatOutcome> {
399        // Route official OpenAI agentic flows to the Responses API, preserving
400        // the pooled client and extra_headers (BYOK / gateway auth).
401        if should_use_responses_api(&self.base_url, &self.model, &request) {
402            return self.responses_reroute().chat(request).await;
403        }
404
405        let thinking_config = match self.resolve_thinking_config(request.thinking.as_ref()) {
406            Ok(thinking) => thinking,
407            Err(error) => return Ok(ChatOutcome::InvalidRequest(error.to_string())),
408        };
409        if let Err(error) = validate_request_attachments(self.provider(), self.model(), &request) {
410            return Ok(ChatOutcome::InvalidRequest(error.to_string()));
411        }
412        let reasoning = build_api_reasoning(thinking_config.as_ref());
413        let messages = build_api_messages(&request);
414        let tools: Option<Vec<ApiTool>> = request
415            .tools
416            .map(|ts| ts.into_iter().map(convert_tool).collect());
417        let tool_choice = request
418            .tool_choice
419            .as_ref()
420            .map(ApiToolChoice::from_tool_choice);
421        let response_format = request
422            .response_format
423            .as_ref()
424            .map(ApiResponseFormat::from_response_format);
425
426        let include_max_tokens_alias = use_max_tokens_alias(&self.base_url);
427        let api_request = ApiChatRequest {
428            model: &self.model,
429            messages: &messages,
430            max_completion_tokens: Some(request.max_tokens),
431            max_tokens: include_max_tokens_alias.then_some(request.max_tokens),
432            tools: tools.as_deref(),
433            tool_choice,
434            reasoning,
435            response_format,
436        };
437
438        log::debug!(
439            "OpenAI LLM request model={} max_tokens={}",
440            self.model,
441            request.max_tokens
442        );
443
444        let builder = self
445            .client
446            .post(format!("{}/chat/completions", self.base_url))
447            .header("Content-Type", "application/json");
448        let response = self
449            .apply_headers(builder)
450            .json(&api_request)
451            .send()
452            .await
453            .map_err(|e| anyhow::anyhow!("request failed: {e}"))?;
454
455        let status = response.status();
456        let bytes = response
457            .bytes()
458            .await
459            .map_err(|e| anyhow::anyhow!("failed to read response body: {e}"))?;
460
461        log::debug!(
462            "OpenAI LLM response status={} body_len={}",
463            status,
464            bytes.len()
465        );
466
467        decode_chat_response(status, &bytes)
468    }
469
470    #[allow(clippy::too_many_lines)]
471    fn chat_stream(&self, request: ChatRequest) -> StreamBox<'_> {
472        // Route official OpenAI agentic flows to the Responses API, preserving
473        // the pooled client and extra_headers (BYOK / gateway auth).
474        if should_use_responses_api(&self.base_url, &self.model, &request) {
475            let responses_provider = self.responses_reroute();
476            return Box::pin(async_stream::stream! {
477                let mut stream = std::pin::pin!(responses_provider.chat_stream(request));
478                while let Some(item) = futures::StreamExt::next(&mut stream).await {
479                    yield item;
480                }
481            });
482        }
483
484        Box::pin(async_stream::stream! {
485            let thinking_config = match self.resolve_thinking_config(request.thinking.as_ref()) {
486                Ok(thinking) => thinking,
487                Err(error) => {
488                    yield Ok(StreamDelta::Error {
489                        message: error.to_string(),
490                        kind: StreamErrorKind::InvalidRequest,
491                    });
492                    return;
493                }
494            };
495            if let Err(error) = validate_request_attachments(self.provider(), self.model(), &request) {
496                yield Ok(StreamDelta::Error {
497                    message: error.to_string(),
498                    kind: StreamErrorKind::InvalidRequest,
499                });
500                return;
501            }
502            let reasoning = build_api_reasoning(thinking_config.as_ref());
503            let messages = build_api_messages(&request);
504            let tools: Option<Vec<ApiTool>> = request
505                .tools
506                .map(|ts| ts.into_iter().map(convert_tool).collect());
507            let tool_choice = request
508                .tool_choice
509                .as_ref()
510                .map(ApiToolChoice::from_tool_choice);
511            let response_format = request
512                .response_format
513                .as_ref()
514                .map(ApiResponseFormat::from_response_format);
515
516            let include_max_tokens_alias = use_max_tokens_alias(&self.base_url);
517            let include_stream_usage = use_stream_usage_options(&self.base_url);
518            let include_openrouter_usage = use_openrouter_usage_options(&self.base_url);
519            let api_request = ApiChatRequestStreaming {
520                model: &self.model,
521                messages: &messages,
522                max_completion_tokens: Some(request.max_tokens),
523                max_tokens: include_max_tokens_alias.then_some(request.max_tokens),
524                tools: tools.as_deref(),
525                tool_choice,
526                reasoning,
527                response_format,
528                stream_options: include_stream_usage.then_some(ApiStreamOptions {
529                    include_usage: true,
530                }),
531                usage: include_openrouter_usage
532                    .then_some(ApiOpenRouterUsageOptions { include: true }),
533                stream: true,
534            };
535
536            log::debug!("OpenAI streaming LLM request model={} max_tokens={}", self.model, request.max_tokens);
537
538            let stream_builder = self.client
539                .post(format!("{}/chat/completions", self.base_url))
540                .header("Content-Type", "application/json");
541            let Ok(response) = self
542                .apply_headers(stream_builder)
543                .json(&api_request)
544                .send()
545                .await
546            else {
547                yield Err(anyhow::anyhow!("request failed"));
548                return;
549            };
550
551            let status = response.status();
552
553            if !status.is_success() {
554                let body = response.text().await.unwrap_or_default();
555                let (kind, level) = if status == StatusCode::TOO_MANY_REQUESTS {
556                    (StreamErrorKind::RateLimited, "rate_limit")
557                } else if status.is_server_error() {
558                    (StreamErrorKind::ServerError, "server_error")
559                } else {
560                    (StreamErrorKind::InvalidRequest, "client_error")
561                };
562                log::warn!("OpenAI error status={status} body={body} kind={level}");
563                yield Ok(StreamDelta::Error { message: body, kind });
564                return;
565            }
566
567            // Track tool call state across deltas
568            let mut tool_calls: HashMap<usize, ToolCallAccumulator> = HashMap::new();
569            let mut usage: Option<Usage> = None;
570            // The stop reason from `finish_reason`. With stream_options.include_usage
571            // (official OpenAI) the usage arrives in a SEPARATE trailing chunk
572            // (choices: []) AFTER finish_reason and before [DONE], so we record the
573            // stop reason and keep consuming until [DONE] / stream end rather than
574            // returning early and dropping that usage chunk.
575            let mut stop_reason: Option<StopReason> = None;
576            let mut sse = SseLineBuffer::new();
577            let mut stream = response.bytes_stream();
578
579            while let Some(chunk_result) = stream.next().await {
580                let chunk = match chunk_result {
581                    Ok(chunk) => chunk,
582                    Err(error) => {
583                        yield Err(anyhow::anyhow!("stream error: {error}"));
584                        return;
585                    }
586                };
587                sse.extend(&chunk);
588
589                while let Some(line) = sse.next_line() {
590                    let line = line.trim();
591                    if line.is_empty() { continue; }
592                    let Some(data) = line.strip_prefix("data: ") else { continue; };
593
594                    let outcome = step_completion_stream(
595                        data,
596                        &mut tool_calls,
597                        &mut usage,
598                        &mut stop_reason,
599                    );
600                    for delta in outcome.immediate { yield Ok(delta); }
601                    if let Some(terminal) = outcome.terminal {
602                        for delta in terminal { yield Ok(delta); }
603                        return;
604                    }
605                }
606            }
607
608            // Stream ended without [DONE] - emit what we have. Infer the stop
609            // reason from accumulated tool calls (same heuristic as the [DONE]
610            // arm) so a stream that dies mid-tool-call doesn't report EndTurn.
611            let sr = stop_reason.unwrap_or_else(|| fallback_stream_stop_reason(&tool_calls));
612            for delta in build_stream_end_deltas(&tool_calls, usage.take(), sr) {
613                yield Ok(delta);
614            }
615        })
616    }
617
618    fn model(&self) -> &str {
619        &self.model
620    }
621
622    fn provider(&self) -> &'static str {
623        "openai"
624    }
625
626    fn configured_thinking(&self) -> Option<&ThinkingConfig> {
627        self.thinking.as_ref()
628    }
629}
630
631/// Apply a tool call update to the accumulator.
632fn apply_tool_call_update(
633    tool_calls: &mut std::collections::HashMap<usize, ToolCallAccumulator>,
634    index: usize,
635    id: Option<String>,
636    name: Option<String>,
637    arguments: Option<String>,
638) {
639    let entry = tool_calls
640        .entry(index)
641        .or_insert_with(|| ToolCallAccumulator {
642            id: String::new(),
643            name: String::new(),
644            arguments: String::new(),
645        });
646    if let Some(id) = id {
647        entry.id = id;
648    }
649    if let Some(name) = name {
650        entry.name = name;
651    }
652    if let Some(args) = arguments {
653        entry.arguments.push_str(&args);
654    }
655}
656
657/// Immediate + terminal deltas produced by feeding one SSE `data:` line to the
658/// Chat Completions streaming state.
659struct SseLineOutcome {
660    /// Deltas to yield immediately (text / thinking).
661    immediate: Vec<StreamDelta>,
662    /// When `Some`, the stream finished ([DONE] received): yield these terminal
663    /// deltas (tool calls + usage + Done) and stop.
664    terminal: Option<Vec<StreamDelta>>,
665}
666
667/// Feed one SSE `data:` payload to the streaming state, accumulating tool calls,
668/// usage, and the stop reason.
669///
670/// Text/thinking deltas are returned for immediate emission. A `finish_reason`
671/// only records the stop reason (it does NOT finalize) so a trailing usage-only
672/// chunk that official `OpenAI` sends after `finish_reason` is still folded in;
673/// finalization happens on the `[DONE]` sentinel.
674fn step_completion_stream(
675    data: &str,
676    tool_calls: &mut HashMap<usize, ToolCallAccumulator>,
677    usage: &mut Option<Usage>,
678    stop_reason: &mut Option<StopReason>,
679) -> SseLineOutcome {
680    let mut immediate = Vec::new();
681    for result in process_sse_data(data) {
682        match result {
683            SseProcessResult::TextDelta(c) => {
684                immediate.push(StreamDelta::TextDelta {
685                    delta: c,
686                    block_index: 0,
687                });
688            }
689            SseProcessResult::ThinkingDelta(c) => {
690                immediate.push(StreamDelta::ThinkingDelta {
691                    delta: c,
692                    block_index: 0,
693                });
694            }
695            SseProcessResult::ToolCallUpdate {
696                index,
697                id,
698                name,
699                arguments,
700            } => apply_tool_call_update(tool_calls, index, id, name, arguments),
701            SseProcessResult::Usage(u) => *usage = Some(u),
702            SseProcessResult::Done(sr) => *stop_reason = Some(sr),
703            SseProcessResult::Sentinel => {
704                let sr = stop_reason.unwrap_or_else(|| fallback_stream_stop_reason(tool_calls));
705                let terminal = build_stream_end_deltas(tool_calls, usage.take(), sr);
706                return SseLineOutcome {
707                    immediate,
708                    terminal: Some(terminal),
709                };
710            }
711        }
712    }
713    SseLineOutcome {
714        immediate,
715        terminal: None,
716    }
717}
718
719/// Helper to emit tool call deltas and done event.
720fn build_stream_end_deltas(
721    tool_calls: &std::collections::HashMap<usize, ToolCallAccumulator>,
722    usage: Option<Usage>,
723    stop_reason: StopReason,
724) -> Vec<StreamDelta> {
725    let mut deltas = Vec::new();
726
727    // Emit tool calls. `idx` comes from the wire `tool_calls[].index`; use
728    // saturating_add so a hostile `usize::MAX` index cannot overflow-panic in
729    // debug builds. StreamAccumulator sorts by index so order stays stable.
730    for (idx, tool) in tool_calls {
731        let block_index = idx.saturating_add(1);
732        deltas.push(StreamDelta::ToolUseStart {
733            id: tool.id.clone(),
734            name: tool.name.clone(),
735            block_index,
736            thought_signature: None,
737        });
738        deltas.push(StreamDelta::ToolInputDelta {
739            id: tool.id.clone(),
740            delta: tool.arguments.clone(),
741            block_index,
742        });
743    }
744
745    // Emit usage
746    if let Some(u) = usage {
747        deltas.push(StreamDelta::Usage(u));
748    }
749
750    // Emit done
751    deltas.push(StreamDelta::Done {
752        stop_reason: Some(stop_reason),
753    });
754
755    deltas
756}
757
758/// Result of processing an SSE chunk.
759enum SseProcessResult {
760    /// Emit a text delta.
761    TextDelta(String),
762    /// Emit a thinking/reasoning delta (reasoning-model fallback when the model
763    /// streams its output via `reasoning_content`/`reasoning` and `content` is
764    /// empty, mirroring the non-streaming `build_content_blocks` fallback).
765    ThinkingDelta(String),
766    /// Update tool call accumulator (index, optional id, optional name, optional args).
767    ToolCallUpdate {
768        index: usize,
769        id: Option<String>,
770        name: Option<String>,
771        arguments: Option<String>,
772    },
773    /// Usage information.
774    Usage(Usage),
775    /// Stream is done with a stop reason.
776    Done(StopReason),
777    /// Stream sentinel [DONE] was received.
778    Sentinel,
779}
780
781/// Process an SSE data line and return results to apply.
782fn process_sse_data(data: &str) -> Vec<SseProcessResult> {
783    if data == "[DONE]" {
784        return vec![SseProcessResult::Sentinel];
785    }
786
787    let Ok(chunk) = serde_json::from_str::<SseChunk>(data) else {
788        return vec![];
789    };
790
791    let mut results = Vec::new();
792
793    // Extract usage if present
794    if let Some(u) = chunk.usage {
795        results.push(SseProcessResult::Usage(Usage {
796            input_tokens: u.prompt_tokens,
797            output_tokens: u.completion_tokens,
798            cached_input_tokens: u
799                .prompt_tokens_details
800                .as_ref()
801                .map_or(0, |details| details.cached_tokens),
802            cache_creation_input_tokens: 0,
803        }));
804    }
805
806    // Process choices
807    if let Some(choice) = chunk.choices.into_iter().next() {
808        // Handle text content delta. When `content` is empty/absent but the
809        // model streamed reasoning tokens (DeepSeek-style answer-in-
810        // `reasoning_content`, or `OpenRouter`-normalized `reasoning`), surface
811        // the reasoning as a thinking delta so the usable output is not silently
812        // dropped under streaming. This mirrors the non-streaming
813        // `build_content_blocks` fallback: text content takes precedence and the
814        // reasoning fallback only fires when `content` is empty.
815        if let Some(content) = choice.delta.content
816            && !content.is_empty()
817        {
818            results.push(SseProcessResult::TextDelta(content));
819        } else if let Some(reasoning) = choice
820            .delta
821            .reasoning_content
822            .as_deref()
823            .or(choice.delta.reasoning.as_deref())
824            .filter(|r| !r.is_empty())
825        {
826            results.push(SseProcessResult::ThinkingDelta(reasoning.to_owned()));
827        }
828
829        // Handle tool call deltas
830        if let Some(tc_deltas) = choice.delta.tool_calls {
831            for tc in tc_deltas {
832                results.push(SseProcessResult::ToolCallUpdate {
833                    index: tc.index,
834                    id: tc.id,
835                    name: tc.function.as_ref().and_then(|f| f.name.clone()),
836                    arguments: tc.function.as_ref().and_then(|f| f.arguments.clone()),
837                });
838            }
839        }
840
841        // Check for finish reason
842        if let Some(finish_reason) = choice.finish_reason {
843            results.push(SseProcessResult::Done(map_finish_reason(&finish_reason)));
844        }
845    }
846
847    results
848}
849
850fn use_max_tokens_alias(base_url: &str) -> bool {
851    base_url.contains("moonshot.ai")
852        || base_url.contains("api.z.ai")
853        || base_url.contains("minimax.io")
854}
855
856/// Every `OpenAI`-compatible endpoint accepts `stream_options.include_usage`;
857/// requesting it everywhere ensures `OpenRouter` / `Baseten` / local streams
858/// carry a usage frame so `total_usage` and downstream cost ledgers are
859/// populated (issue #302), not just first-party `api.openai.com` turns.
860const fn use_stream_usage_options(_base_url: &str) -> bool {
861    true
862}
863
864/// `OpenRouter` requires a separate top-level `usage: { include: true }` flag
865/// (distinct from `stream_options.include_usage`) to emit a usage frame.
866fn use_openrouter_usage_options(base_url: &str) -> bool {
867    base_url.contains("openrouter.ai")
868}
869
870/// Infer the stream stop reason when the provider never sent an explicit
871/// `finish_reason` (truncated stream / EOF): a turn with accumulated tool
872/// calls is a `ToolUse`, otherwise a plain `EndTurn`.
873fn fallback_stream_stop_reason(
874    tool_calls: &std::collections::HashMap<usize, ToolCallAccumulator>,
875) -> StopReason {
876    if tool_calls.is_empty() {
877        StopReason::EndTurn
878    } else {
879        StopReason::ToolUse
880    }
881}
882
883/// Map an HTTP status + body into a [`ChatOutcome`], parsing the success body
884/// into a [`ChatResponse`].
885fn decode_chat_response(status: StatusCode, bytes: &[u8]) -> Result<ChatOutcome> {
886    if status == StatusCode::TOO_MANY_REQUESTS {
887        return Ok(ChatOutcome::RateLimited);
888    }
889
890    if status.is_server_error() {
891        let body = String::from_utf8_lossy(bytes);
892        log::error!("OpenAI server error status={status} body={body}");
893        return Ok(ChatOutcome::ServerError(body.into_owned()));
894    }
895
896    if status.is_client_error() {
897        let body = String::from_utf8_lossy(bytes);
898        log::warn!("OpenAI client error status={status} body={body}");
899        return Ok(ChatOutcome::InvalidRequest(body.into_owned()));
900    }
901
902    let api_response: ApiChatResponse = serde_json::from_slice(bytes)
903        .map_err(|e| anyhow::anyhow!("failed to parse response: {e}"))?;
904
905    let choice = api_response
906        .choices
907        .into_iter()
908        .next()
909        .ok_or_else(|| anyhow::anyhow!("no choices in response"))?;
910
911    let content = build_content_blocks(&choice.message);
912    let stop_reason = choice.finish_reason.as_deref().map(map_finish_reason);
913
914    Ok(ChatOutcome::Success(ChatResponse {
915        id: api_response.id,
916        content,
917        model: api_response.model,
918        stop_reason,
919        usage: Usage {
920            input_tokens: api_response.usage.prompt_tokens,
921            output_tokens: api_response.usage.completion_tokens,
922            cached_input_tokens: api_response
923                .usage
924                .prompt_tokens_details
925                .as_ref()
926                .map_or(0, |details| details.cached_tokens),
927            cache_creation_input_tokens: 0,
928        },
929    }))
930}
931
932fn map_finish_reason(finish_reason: &str) -> StopReason {
933    match finish_reason {
934        "stop" => StopReason::EndTurn,
935        "tool_calls" => StopReason::ToolUse,
936        "length" => StopReason::MaxTokens,
937        "content_filter" | "network_error" => StopReason::StopSequence,
938        "sensitive" => StopReason::Refusal,
939        unknown => {
940            log::debug!("Unknown finish_reason from OpenAI-compatible API: {unknown}");
941            StopReason::StopSequence
942        }
943    }
944}
945
946fn build_api_reasoning(thinking: Option<&ThinkingConfig>) -> Option<ApiReasoning> {
947    thinking
948        .and_then(resolve_reasoning_effort)
949        .map(|effort| ApiReasoning { effort })
950}
951
952const fn resolve_reasoning_effort(config: &ThinkingConfig) -> Option<ReasoningEffort> {
953    if let Some(effort) = config.effort {
954        return Some(map_effort(effort));
955    }
956
957    match &config.mode {
958        ThinkingMode::Adaptive => None,
959        ThinkingMode::Enabled { budget_tokens } => Some(map_budget_to_reasoning(*budget_tokens)),
960    }
961}
962
963const fn map_effort(effort: Effort) -> ReasoningEffort {
964    match effort {
965        Effort::Low => ReasoningEffort::Low,
966        Effort::Medium => ReasoningEffort::Medium,
967        Effort::High => ReasoningEffort::High,
968        Effort::Max => ReasoningEffort::XHigh,
969    }
970}
971
972const fn map_budget_to_reasoning(budget_tokens: u32) -> ReasoningEffort {
973    if budget_tokens <= 4_096 {
974        ReasoningEffort::Low
975    } else if budget_tokens <= 16_384 {
976        ReasoningEffort::Medium
977    } else if budget_tokens <= 32_768 {
978        ReasoningEffort::High
979    } else {
980        ReasoningEffort::XHigh
981    }
982}
983
984const fn api_role(role: agent_sdk_foundation::llm::Role) -> ApiRole {
985    match role {
986        agent_sdk_foundation::llm::Role::User => ApiRole::User,
987        agent_sdk_foundation::llm::Role::Assistant => ApiRole::Assistant,
988    }
989}
990
991/// Convert a `Content::Blocks` message into the `OpenAI` wire messages it maps
992/// to, appending them to `messages`.
993///
994/// Tool results become standalone `tool` messages; text, tool calls and (on
995/// assistant tool-call turns) echoed-back reasoning collapse into a single
996/// message.
997fn append_block_messages(
998    messages: &mut Vec<ApiMessage>,
999    role: agent_sdk_foundation::llm::Role,
1000    blocks: &[ContentBlock],
1001) {
1002    let mut text_parts = Vec::new();
1003    let mut thinking_parts = Vec::new();
1004    let mut tool_calls = Vec::new();
1005
1006    for block in blocks {
1007        match block {
1008            ContentBlock::Text { text } => text_parts.push(text.clone()),
1009            ContentBlock::Thinking { thinking, .. } => {
1010                // DeepSeek-style thinking-mode multi-turn requires the prior
1011                // assistant reasoning_content to be echoed back on a tool-call
1012                // turn or the API 400s. Collected here; only carried into
1013                // reasoning_content below when this turn also has a tool call.
1014                thinking_parts.push(thinking.clone());
1015            }
1016            ContentBlock::RedactedThinking { .. }
1017            | ContentBlock::Image { .. }
1018            | ContentBlock::Document { .. } => {
1019                // These blocks are not sent to the OpenAI API
1020            }
1021            ContentBlock::ToolUse {
1022                id, name, input, ..
1023            } => {
1024                tool_calls.push(ApiToolCall {
1025                    id: id.clone(),
1026                    r#type: "function".to_owned(),
1027                    function: ApiFunctionCall {
1028                        name: name.clone(),
1029                        arguments: serde_json::to_string(input).unwrap_or_else(|_| "{}".to_owned()),
1030                    },
1031                });
1032            }
1033            ContentBlock::ToolResult {
1034                tool_use_id,
1035                content,
1036                ..
1037            } => {
1038                // Tool results are separate messages in OpenAI
1039                messages.push(ApiMessage {
1040                    role: ApiRole::Tool,
1041                    content: Some(content.clone()),
1042                    reasoning_content: None,
1043                    tool_calls: None,
1044                    tool_call_id: Some(tool_use_id.clone()),
1045                });
1046            }
1047            // `ContentBlock` is `#[non_exhaustive]`; a block kind this SDK
1048            // version cannot represent is not sent to OpenAI.
1049            _ => log::warn!("Skipping unrecognized OpenAI content block"),
1050        }
1051    }
1052
1053    let role = api_role(role);
1054
1055    // reasoning_content is only echoed back on an assistant turn that ALSO
1056    // carries a tool call — the one case DeepSeek's thinking-mode protocol
1057    // requires it. Per that protocol legacy `deepseek-reasoner` 400s if
1058    // reasoning_content appears in input at all, and DeepSeek V4 thinking-mode
1059    // only needs it on tool-call turns. So a plain reasoning-only assistant
1060    // turn (no tool call) does NOT carry reasoning_content, and it is never
1061    // attached to user messages.
1062    let reasoning_content =
1063        if role == ApiRole::Assistant && !thinking_parts.is_empty() && !tool_calls.is_empty() {
1064            Some(thinking_parts.join("\n"))
1065        } else {
1066            None
1067        };
1068
1069    // Add the message when it carries text, tool calls, or (for an assistant
1070    // turn) reasoning to echo back. Only emit if it's an assistant message or
1071    // has text content.
1072    let has_payload =
1073        !text_parts.is_empty() || !tool_calls.is_empty() || reasoning_content.is_some();
1074    if has_payload && (role == ApiRole::Assistant || !text_parts.is_empty()) {
1075        messages.push(ApiMessage {
1076            role,
1077            content: if text_parts.is_empty() {
1078                None
1079            } else {
1080                Some(text_parts.join("\n"))
1081            },
1082            reasoning_content,
1083            tool_calls: if tool_calls.is_empty() {
1084                None
1085            } else {
1086                Some(tool_calls)
1087            },
1088            tool_call_id: None,
1089        });
1090    }
1091}
1092
1093fn build_api_messages(request: &ChatRequest) -> Vec<ApiMessage> {
1094    let mut messages = Vec::new();
1095
1096    // Add system message first (OpenAI uses a separate message for system prompt)
1097    if !request.system.is_empty() {
1098        messages.push(ApiMessage {
1099            role: ApiRole::System,
1100            content: Some(request.system.clone()),
1101            reasoning_content: None,
1102            tool_calls: None,
1103            tool_call_id: None,
1104        });
1105    }
1106
1107    // Convert SDK messages to OpenAI format
1108    for msg in &request.messages {
1109        match &msg.content {
1110            Content::Text(text) => {
1111                messages.push(ApiMessage {
1112                    role: api_role(msg.role),
1113                    content: Some(text.clone()),
1114                    reasoning_content: None,
1115                    tool_calls: None,
1116                    tool_call_id: None,
1117                });
1118            }
1119            Content::Blocks(blocks) => append_block_messages(&mut messages, msg.role, blocks),
1120        }
1121    }
1122
1123    messages
1124}
1125
1126fn convert_tool(t: agent_sdk_foundation::llm::Tool) -> ApiTool {
1127    ApiTool {
1128        r#type: "function".to_owned(),
1129        function: ApiFunction {
1130            name: t.name,
1131            description: t.description,
1132            parameters: t.input_schema,
1133        },
1134    }
1135}
1136
1137/// Non-empty reasoning text from an `OpenAI`-compatible response message, if any.
1138///
1139/// Prefers `DeepSeek`-style `reasoning_content`, falling back to the `reasoning`
1140/// field used by some `OpenRouter` upstreams.
1141fn reasoning_text(message: &ApiResponseMessage) -> Option<&str> {
1142    message
1143        .reasoning_content
1144        .as_deref()
1145        .or(message.reasoning.as_deref())
1146        .filter(|r| !r.is_empty())
1147}
1148
1149fn build_content_blocks(message: &ApiResponseMessage) -> Vec<ContentBlock> {
1150    let mut blocks = Vec::new();
1151
1152    // Add text content if present
1153    if let Some(content) = &message.content
1154        && !content.is_empty()
1155    {
1156        blocks.push(ContentBlock::Text {
1157            text: content.clone(),
1158        });
1159    } else if let Some(reasoning) = reasoning_text(message) {
1160        // Reasoning-model fallback: when `content` is empty/absent but the model
1161        // produced reasoning tokens (DeepSeek-style answer-in-`reasoning_content`,
1162        // or any reasoning model truncated under a tight `max_tokens` before it
1163        // emitted visible content), surface the reasoning as a Thinking block so
1164        // the usable output is not silently dropped. This is a fallback only —
1165        // when `content` is present the reasoning is left untouched.
1166        blocks.push(ContentBlock::Thinking {
1167            thinking: reasoning.to_owned(),
1168            signature: None,
1169        });
1170    }
1171
1172    // Add tool calls if present
1173    if let Some(tool_calls) = &message.tool_calls {
1174        for tc in tool_calls {
1175            let input: serde_json::Value = serde_json::from_str(&tc.function.arguments)
1176                .unwrap_or_else(|_| serde_json::json!({}));
1177            blocks.push(ContentBlock::ToolUse {
1178                id: tc.id.clone(),
1179                name: tc.function.name.clone(),
1180                input,
1181                thought_signature: None,
1182            });
1183        }
1184    }
1185
1186    blocks
1187}
1188
1189// ============================================================================
1190// API Request Types
1191// ============================================================================
1192
1193#[derive(Serialize)]
1194struct ApiChatRequest<'a> {
1195    model: &'a str,
1196    messages: &'a [ApiMessage],
1197    #[serde(skip_serializing_if = "Option::is_none")]
1198    max_completion_tokens: Option<u32>,
1199    #[serde(skip_serializing_if = "Option::is_none")]
1200    max_tokens: Option<u32>,
1201    #[serde(skip_serializing_if = "Option::is_none")]
1202    tools: Option<&'a [ApiTool]>,
1203    #[serde(skip_serializing_if = "Option::is_none")]
1204    tool_choice: Option<ApiToolChoice>,
1205    #[serde(skip_serializing_if = "Option::is_none")]
1206    reasoning: Option<ApiReasoning>,
1207    #[serde(skip_serializing_if = "Option::is_none")]
1208    response_format: Option<ApiResponseFormat>,
1209}
1210
1211#[derive(Serialize)]
1212struct ApiChatRequestStreaming<'a> {
1213    model: &'a str,
1214    messages: &'a [ApiMessage],
1215    #[serde(skip_serializing_if = "Option::is_none")]
1216    max_completion_tokens: Option<u32>,
1217    #[serde(skip_serializing_if = "Option::is_none")]
1218    max_tokens: Option<u32>,
1219    #[serde(skip_serializing_if = "Option::is_none")]
1220    tools: Option<&'a [ApiTool]>,
1221    #[serde(skip_serializing_if = "Option::is_none")]
1222    tool_choice: Option<ApiToolChoice>,
1223    #[serde(skip_serializing_if = "Option::is_none")]
1224    reasoning: Option<ApiReasoning>,
1225    #[serde(skip_serializing_if = "Option::is_none")]
1226    response_format: Option<ApiResponseFormat>,
1227    #[serde(skip_serializing_if = "Option::is_none")]
1228    stream_options: Option<ApiStreamOptions>,
1229    #[serde(skip_serializing_if = "Option::is_none")]
1230    usage: Option<ApiOpenRouterUsageOptions>,
1231    stream: bool,
1232}
1233
1234/// `OpenAI` `tool_choice` wire format.
1235///
1236/// - `"auto"` — model decides.
1237/// - `{"type": "function", "function": {"name": "<name>"}}` — force a specific function.
1238#[derive(Serialize)]
1239#[serde(untagged)]
1240enum ApiToolChoice {
1241    String(String),
1242    Named {
1243        #[serde(rename = "type")]
1244        choice_type: String,
1245        function: ApiToolChoiceFunction,
1246    },
1247}
1248
1249#[derive(Serialize)]
1250struct ApiToolChoiceFunction {
1251    name: String,
1252}
1253
1254impl ApiToolChoice {
1255    fn from_tool_choice(tc: &agent_sdk_foundation::llm::ToolChoice) -> Self {
1256        match tc {
1257            agent_sdk_foundation::llm::ToolChoice::Auto => Self::String("auto".to_owned()),
1258            agent_sdk_foundation::llm::ToolChoice::Tool(name) => Self::Named {
1259                choice_type: "function".to_owned(),
1260                function: ApiToolChoiceFunction { name: name.clone() },
1261            },
1262        }
1263    }
1264}
1265
1266/// `OpenAI` `response_format` wire format for structured outputs.
1267///
1268/// Emits `{"type": "json_schema", "json_schema": {"name", "schema", "strict"}}`.
1269#[derive(Serialize)]
1270struct ApiResponseFormat {
1271    #[serde(rename = "type")]
1272    format_type: &'static str,
1273    json_schema: ApiJsonSchema,
1274}
1275
1276#[derive(Serialize)]
1277struct ApiJsonSchema {
1278    name: String,
1279    schema: serde_json::Value,
1280    strict: bool,
1281}
1282
1283impl ApiResponseFormat {
1284    fn from_response_format(rf: &agent_sdk_foundation::llm::ResponseFormat) -> Self {
1285        Self {
1286            format_type: "json_schema",
1287            json_schema: ApiJsonSchema {
1288                name: rf.name.clone(),
1289                schema: rf.schema.clone(),
1290                strict: rf.strict,
1291            },
1292        }
1293    }
1294}
1295
1296#[derive(Clone, Copy, Serialize)]
1297struct ApiStreamOptions {
1298    include_usage: bool,
1299}
1300
1301/// `OpenRouter`'s top-level usage-accounting flag (`usage: { include: true }`),
1302/// distinct from `stream_options.include_usage`.
1303#[derive(Clone, Copy, Serialize)]
1304struct ApiOpenRouterUsageOptions {
1305    include: bool,
1306}
1307
1308#[derive(Clone, Copy, Serialize)]
1309#[serde(rename_all = "lowercase")]
1310enum ReasoningEffort {
1311    Low,
1312    Medium,
1313    High,
1314    #[serde(rename = "xhigh")]
1315    XHigh,
1316}
1317
1318#[derive(Serialize)]
1319struct ApiReasoning {
1320    effort: ReasoningEffort,
1321}
1322
1323#[derive(Serialize)]
1324struct ApiMessage {
1325    role: ApiRole,
1326    #[serde(skip_serializing_if = "Option::is_none")]
1327    content: Option<String>,
1328    /// `DeepSeek`-style thinking-mode multi-turn requires the prior assistant
1329    /// `reasoning_content` to be echoed back on a tool-call turn or the API
1330    /// rejects it (HTTP 400). Carried back only for assistant turns that had a
1331    /// Thinking block AND a tool call; omitted entirely otherwise (including
1332    /// reasoning-only turns, since legacy `deepseek-reasoner` 400s if
1333    /// `reasoning_content` appears in input) so the normal path is unchanged.
1334    #[serde(skip_serializing_if = "Option::is_none")]
1335    reasoning_content: Option<String>,
1336    #[serde(skip_serializing_if = "Option::is_none")]
1337    tool_calls: Option<Vec<ApiToolCall>>,
1338    #[serde(skip_serializing_if = "Option::is_none")]
1339    tool_call_id: Option<String>,
1340}
1341
1342#[derive(Debug, Serialize, PartialEq, Eq)]
1343#[serde(rename_all = "lowercase")]
1344enum ApiRole {
1345    System,
1346    User,
1347    Assistant,
1348    Tool,
1349}
1350
1351#[derive(Serialize)]
1352struct ApiToolCall {
1353    id: String,
1354    r#type: String,
1355    function: ApiFunctionCall,
1356}
1357
1358#[derive(Serialize)]
1359struct ApiFunctionCall {
1360    name: String,
1361    arguments: String,
1362}
1363
1364#[derive(Serialize)]
1365struct ApiTool {
1366    r#type: String,
1367    function: ApiFunction,
1368}
1369
1370#[derive(Serialize)]
1371struct ApiFunction {
1372    name: String,
1373    description: String,
1374    parameters: serde_json::Value,
1375}
1376
1377// ============================================================================
1378// API Response Types
1379// ============================================================================
1380
1381#[derive(Deserialize)]
1382struct ApiChatResponse {
1383    id: String,
1384    choices: Vec<ApiChoice>,
1385    model: String,
1386    usage: ApiUsage,
1387}
1388
1389#[derive(Deserialize)]
1390struct ApiChoice {
1391    message: ApiResponseMessage,
1392    finish_reason: Option<String>,
1393}
1394
1395#[derive(Deserialize)]
1396struct ApiResponseMessage {
1397    content: Option<String>,
1398    tool_calls: Option<Vec<ApiResponseToolCall>>,
1399    /// `DeepSeek`-style chain-of-thought, returned at the same level as
1400    /// `content` (`DeepSeek` V4 / some `OpenRouter` providers).
1401    #[serde(default)]
1402    reasoning_content: Option<String>,
1403    /// `OpenRouter` normalizes reasoning under a `reasoning` field for some
1404    /// upstreams; treated as an equivalent fallback to `reasoning_content`.
1405    #[serde(default)]
1406    reasoning: Option<String>,
1407}
1408
1409#[derive(Deserialize)]
1410struct ApiResponseToolCall {
1411    id: String,
1412    function: ApiResponseFunctionCall,
1413}
1414
1415#[derive(Deserialize)]
1416struct ApiResponseFunctionCall {
1417    name: String,
1418    arguments: String,
1419}
1420
1421#[derive(Deserialize)]
1422struct ApiUsage {
1423    #[serde(deserialize_with = "deserialize_u32_from_number")]
1424    prompt_tokens: u32,
1425    #[serde(deserialize_with = "deserialize_u32_from_number")]
1426    completion_tokens: u32,
1427    #[serde(default)]
1428    prompt_tokens_details: Option<ApiPromptTokensDetails>,
1429}
1430
1431#[derive(Deserialize)]
1432struct ApiPromptTokensDetails {
1433    #[serde(default, deserialize_with = "deserialize_u32_from_number")]
1434    cached_tokens: u32,
1435}
1436
1437// ============================================================================
1438// SSE Streaming Types
1439// ============================================================================
1440
1441/// Accumulator for tool call state across stream deltas.
1442struct ToolCallAccumulator {
1443    id: String,
1444    name: String,
1445    arguments: String,
1446}
1447
1448/// A single chunk in `OpenAI`'s SSE stream.
1449#[derive(Deserialize)]
1450struct SseChunk {
1451    // A usage-only frame (OpenAI's trailing chunk, OpenRouter, etc.) may omit
1452    // `choices` entirely; without `default` it fails to deserialize and the
1453    // usage frame is dropped silently.
1454    #[serde(default)]
1455    choices: Vec<SseChoice>,
1456    #[serde(default)]
1457    usage: Option<SseUsage>,
1458}
1459
1460#[derive(Deserialize)]
1461struct SseChoice {
1462    delta: SseDelta,
1463    finish_reason: Option<String>,
1464}
1465
1466#[derive(Deserialize)]
1467struct SseDelta {
1468    content: Option<String>,
1469    tool_calls: Option<Vec<SseToolCallDelta>>,
1470    /// `DeepSeek`-style streamed chain-of-thought, returned at the same level as
1471    /// `content` (`DeepSeek` V4 / some `OpenRouter` providers).
1472    #[serde(default)]
1473    reasoning_content: Option<String>,
1474    /// `OpenRouter` normalizes streamed reasoning under a `reasoning` field for
1475    /// some upstreams; treated as an equivalent fallback to `reasoning_content`.
1476    #[serde(default)]
1477    reasoning: Option<String>,
1478}
1479
1480#[derive(Deserialize)]
1481struct SseToolCallDelta {
1482    index: usize,
1483    id: Option<String>,
1484    function: Option<SseFunctionDelta>,
1485}
1486
1487#[derive(Deserialize)]
1488struct SseFunctionDelta {
1489    name: Option<String>,
1490    arguments: Option<String>,
1491}
1492
1493#[derive(Deserialize)]
1494struct SseUsage {
1495    #[serde(deserialize_with = "deserialize_u32_from_number")]
1496    prompt_tokens: u32,
1497    #[serde(deserialize_with = "deserialize_u32_from_number")]
1498    completion_tokens: u32,
1499    #[serde(default)]
1500    prompt_tokens_details: Option<ApiPromptTokensDetails>,
1501}
1502
1503fn deserialize_u32_from_number<'de, D>(deserializer: D) -> std::result::Result<u32, D::Error>
1504where
1505    D: serde::Deserializer<'de>,
1506{
1507    #[derive(Deserialize)]
1508    #[serde(untagged)]
1509    enum NumberLike {
1510        U64(u64),
1511        F64(f64),
1512    }
1513
1514    match NumberLike::deserialize(deserializer)? {
1515        NumberLike::U64(v) => u32::try_from(v)
1516            .map_err(|_| D::Error::custom(format!("token count out of range for u32: {v}"))),
1517        NumberLike::F64(v) => {
1518            if v.is_finite() && v >= 0.0 && v.fract() == 0.0 && v <= f64::from(u32::MAX) {
1519                v.to_string().parse::<u32>().map_err(|e| {
1520                    D::Error::custom(format!(
1521                        "failed to convert integer-compatible token count {v} to u32: {e}"
1522                    ))
1523                })
1524            } else {
1525                Err(D::Error::custom(format!(
1526                    "token count must be a non-negative integer-compatible number, got {v}"
1527                )))
1528            }
1529        }
1530    }
1531}
1532
1533#[cfg(test)]
1534mod tests {
1535    use super::*;
1536    use anyhow::Context as _;
1537
1538    // ===================
1539    // Constructor Tests
1540    // ===================
1541
1542    #[test]
1543    fn test_new_creates_provider_with_custom_model() {
1544        let provider = OpenAIProvider::new("test-api-key".to_string(), "custom-model".to_string());
1545
1546        assert_eq!(provider.model(), "custom-model");
1547        assert_eq!(provider.provider(), "openai");
1548        assert_eq!(provider.base_url, DEFAULT_BASE_URL);
1549    }
1550
1551    #[test]
1552    fn test_with_base_url_creates_provider_with_custom_url() {
1553        let provider = OpenAIProvider::with_base_url(
1554            "test-api-key".to_string(),
1555            "llama3".to_string(),
1556            "http://localhost:11434/v1".to_string(),
1557        );
1558
1559        assert_eq!(provider.model(), "llama3");
1560        assert_eq!(provider.base_url, "http://localhost:11434/v1");
1561    }
1562
1563    #[test]
1564    fn test_gpt4o_factory_creates_gpt4o_provider() {
1565        let provider = OpenAIProvider::gpt4o("test-api-key".to_string());
1566
1567        assert_eq!(provider.model(), MODEL_GPT4O);
1568        assert_eq!(provider.provider(), "openai");
1569    }
1570
1571    #[test]
1572    fn test_gpt4o_mini_factory_creates_gpt4o_mini_provider() {
1573        let provider = OpenAIProvider::gpt4o_mini("test-api-key".to_string());
1574
1575        assert_eq!(provider.model(), MODEL_GPT4O_MINI);
1576        assert_eq!(provider.provider(), "openai");
1577    }
1578
1579    #[test]
1580    fn test_gpt52_thinking_factory_creates_provider() {
1581        let provider = OpenAIProvider::gpt52_thinking("test-api-key".to_string());
1582
1583        assert_eq!(provider.model(), MODEL_GPT52_THINKING);
1584        assert_eq!(provider.provider(), "openai");
1585    }
1586
1587    #[test]
1588    fn test_gpt54_factory_creates_provider() {
1589        let provider = OpenAIProvider::gpt54("test-api-key".to_string());
1590
1591        assert_eq!(provider.model(), MODEL_GPT54);
1592        assert_eq!(provider.provider(), "openai");
1593    }
1594
1595    #[test]
1596    fn test_gpt53_codex_factory_creates_provider() {
1597        let provider = OpenAIProvider::gpt53_codex("test-api-key".to_string());
1598
1599        assert_eq!(provider.model(), MODEL_GPT53_CODEX);
1600        assert_eq!(provider.provider(), "openai");
1601    }
1602
1603    #[test]
1604    fn test_codex_factory_points_to_latest_codex_model() {
1605        let provider = OpenAIProvider::codex("test-api-key".to_string());
1606
1607        assert_eq!(provider.model(), MODEL_GPT53_CODEX);
1608        assert_eq!(provider.provider(), "openai");
1609    }
1610
1611    #[test]
1612    fn test_gpt5_factory_creates_gpt5_provider() {
1613        let provider = OpenAIProvider::gpt5("test-api-key".to_string());
1614
1615        assert_eq!(provider.model(), MODEL_GPT5);
1616        assert_eq!(provider.provider(), "openai");
1617    }
1618
1619    #[test]
1620    fn test_gpt5_mini_factory_creates_provider() {
1621        let provider = OpenAIProvider::gpt5_mini("test-api-key".to_string());
1622
1623        assert_eq!(provider.model(), MODEL_GPT5_MINI);
1624        assert_eq!(provider.provider(), "openai");
1625    }
1626
1627    #[test]
1628    fn test_o3_factory_creates_o3_provider() {
1629        let provider = OpenAIProvider::o3("test-api-key".to_string());
1630
1631        assert_eq!(provider.model(), MODEL_O3);
1632        assert_eq!(provider.provider(), "openai");
1633    }
1634
1635    #[test]
1636    fn test_o4_mini_factory_creates_o4_mini_provider() {
1637        let provider = OpenAIProvider::o4_mini("test-api-key".to_string());
1638
1639        assert_eq!(provider.model(), MODEL_O4_MINI);
1640        assert_eq!(provider.provider(), "openai");
1641    }
1642
1643    #[test]
1644    fn test_o1_factory_creates_o1_provider() {
1645        let provider = OpenAIProvider::o1("test-api-key".to_string());
1646
1647        assert_eq!(provider.model(), MODEL_O1);
1648        assert_eq!(provider.provider(), "openai");
1649    }
1650
1651    #[test]
1652    fn test_gpt41_factory_creates_gpt41_provider() {
1653        let provider = OpenAIProvider::gpt41("test-api-key".to_string());
1654
1655        assert_eq!(provider.model(), MODEL_GPT41);
1656        assert_eq!(provider.provider(), "openai");
1657    }
1658
1659    #[test]
1660    fn test_kimi_factory_creates_provider_with_kimi_base_url() {
1661        let provider = OpenAIProvider::kimi("test-api-key".to_string(), "kimi-custom".to_string());
1662
1663        assert_eq!(provider.model(), "kimi-custom");
1664        assert_eq!(provider.base_url, BASE_URL_KIMI);
1665        assert_eq!(provider.provider(), "openai");
1666    }
1667
1668    #[test]
1669    fn test_kimi_k2_5_factory_creates_provider() {
1670        let provider = OpenAIProvider::kimi_k2_5("test-api-key".to_string());
1671
1672        assert_eq!(provider.model(), MODEL_KIMI_K2_5);
1673        assert_eq!(provider.base_url, BASE_URL_KIMI);
1674        assert_eq!(provider.provider(), "openai");
1675    }
1676
1677    #[test]
1678    fn test_kimi_k2_thinking_factory_creates_provider() {
1679        let provider = OpenAIProvider::kimi_k2_thinking("test-api-key".to_string());
1680
1681        assert_eq!(provider.model(), MODEL_KIMI_K2_THINKING);
1682        assert_eq!(provider.base_url, BASE_URL_KIMI);
1683        assert_eq!(provider.provider(), "openai");
1684    }
1685
1686    #[test]
1687    fn test_zai_factory_creates_provider_with_zai_base_url() {
1688        let provider = OpenAIProvider::zai("test-api-key".to_string(), "glm-custom".to_string());
1689
1690        assert_eq!(provider.model(), "glm-custom");
1691        assert_eq!(provider.base_url, BASE_URL_ZAI);
1692        assert_eq!(provider.provider(), "openai");
1693    }
1694
1695    #[test]
1696    fn test_zai_glm5_factory_creates_provider() {
1697        let provider = OpenAIProvider::zai_glm5("test-api-key".to_string());
1698
1699        assert_eq!(provider.model(), MODEL_ZAI_GLM5);
1700        assert_eq!(provider.base_url, BASE_URL_ZAI);
1701        assert_eq!(provider.provider(), "openai");
1702    }
1703
1704    #[test]
1705    fn test_minimax_factory_creates_provider_with_minimax_base_url() {
1706        let provider =
1707            OpenAIProvider::minimax("test-api-key".to_string(), "minimax-custom".to_string());
1708
1709        assert_eq!(provider.model(), "minimax-custom");
1710        assert_eq!(provider.base_url, BASE_URL_MINIMAX);
1711        assert_eq!(provider.provider(), "openai");
1712    }
1713
1714    #[test]
1715    fn test_minimax_m2_5_factory_creates_provider() {
1716        let provider = OpenAIProvider::minimax_m2_5("test-api-key".to_string());
1717
1718        assert_eq!(provider.model(), MODEL_MINIMAX_M2_5);
1719        assert_eq!(provider.base_url, BASE_URL_MINIMAX);
1720        assert_eq!(provider.provider(), "openai");
1721    }
1722
1723    // ===================
1724    // Model Constants Tests
1725    // ===================
1726
1727    #[test]
1728    fn test_model_constants_have_expected_values() {
1729        // GPT-5.4 / GPT-5.3 Codex
1730        assert_eq!(MODEL_GPT54, "gpt-5.4");
1731        assert_eq!(MODEL_GPT53_CODEX, "gpt-5.3-codex");
1732        // GPT-5.2 series
1733        assert_eq!(MODEL_GPT52_INSTANT, "gpt-5.2-instant");
1734        assert_eq!(MODEL_GPT52_THINKING, "gpt-5.2-thinking");
1735        assert_eq!(MODEL_GPT52_PRO, "gpt-5.2-pro");
1736        assert_eq!(MODEL_GPT52_CODEX, "gpt-5.2-codex");
1737        // GPT-5 series
1738        assert_eq!(MODEL_GPT5, "gpt-5");
1739        assert_eq!(MODEL_GPT5_MINI, "gpt-5-mini");
1740        assert_eq!(MODEL_GPT5_NANO, "gpt-5-nano");
1741        // o-series
1742        assert_eq!(MODEL_O3, "o3");
1743        assert_eq!(MODEL_O3_MINI, "o3-mini");
1744        assert_eq!(MODEL_O4_MINI, "o4-mini");
1745        assert_eq!(MODEL_O1, "o1");
1746        assert_eq!(MODEL_O1_MINI, "o1-mini");
1747        // GPT-4.1 series
1748        assert_eq!(MODEL_GPT41, "gpt-4.1");
1749        assert_eq!(MODEL_GPT41_MINI, "gpt-4.1-mini");
1750        assert_eq!(MODEL_GPT41_NANO, "gpt-4.1-nano");
1751        // GPT-4o series
1752        assert_eq!(MODEL_GPT4O, "gpt-4o");
1753        assert_eq!(MODEL_GPT4O_MINI, "gpt-4o-mini");
1754        // OpenAI-compatible vendor defaults
1755        assert_eq!(MODEL_KIMI_K2_5, "kimi-k2.5");
1756        assert_eq!(MODEL_KIMI_K2_THINKING, "kimi-k2-thinking");
1757        assert_eq!(MODEL_ZAI_GLM5, "glm-5");
1758        assert_eq!(MODEL_MINIMAX_M2_5, "MiniMax-M2.5");
1759        assert_eq!(BASE_URL_KIMI, "https://api.moonshot.ai/v1");
1760        assert_eq!(BASE_URL_ZAI, "https://api.z.ai/api/paas/v4");
1761        assert_eq!(BASE_URL_MINIMAX, "https://api.minimax.io/v1");
1762    }
1763
1764    // ===================
1765    // Clone Tests
1766    // ===================
1767
1768    #[test]
1769    fn test_provider_is_cloneable() {
1770        let provider = OpenAIProvider::new("test-api-key".to_string(), "test-model".to_string());
1771        let cloned = provider.clone();
1772
1773        assert_eq!(provider.model(), cloned.model());
1774        assert_eq!(provider.provider(), cloned.provider());
1775        assert_eq!(provider.base_url, cloned.base_url);
1776    }
1777
1778    // ===================
1779    // API Type Serialization Tests
1780    // ===================
1781
1782    #[test]
1783    fn test_api_role_serialization() {
1784        let system_role = ApiRole::System;
1785        let user_role = ApiRole::User;
1786        let assistant_role = ApiRole::Assistant;
1787        let tool_role = ApiRole::Tool;
1788
1789        assert_eq!(serde_json::to_string(&system_role).unwrap(), "\"system\"");
1790        assert_eq!(serde_json::to_string(&user_role).unwrap(), "\"user\"");
1791        assert_eq!(
1792            serde_json::to_string(&assistant_role).unwrap(),
1793            "\"assistant\""
1794        );
1795        assert_eq!(serde_json::to_string(&tool_role).unwrap(), "\"tool\"");
1796    }
1797
1798    #[test]
1799    fn test_api_message_serialization_simple() {
1800        let message = ApiMessage {
1801            role: ApiRole::User,
1802            content: Some("Hello, world!".to_string()),
1803            reasoning_content: None,
1804            tool_calls: None,
1805            tool_call_id: None,
1806        };
1807
1808        let json = serde_json::to_string(&message).unwrap();
1809        assert!(json.contains("\"role\":\"user\""));
1810        assert!(json.contains("\"content\":\"Hello, world!\""));
1811        // Optional fields should be omitted
1812        assert!(!json.contains("tool_calls"));
1813        assert!(!json.contains("tool_call_id"));
1814    }
1815
1816    #[test]
1817    fn test_api_message_serialization_with_tool_calls() {
1818        let message = ApiMessage {
1819            role: ApiRole::Assistant,
1820            content: Some("Let me help.".to_string()),
1821            reasoning_content: None,
1822            tool_calls: Some(vec![ApiToolCall {
1823                id: "call_123".to_string(),
1824                r#type: "function".to_string(),
1825                function: ApiFunctionCall {
1826                    name: "read_file".to_string(),
1827                    arguments: "{\"path\": \"/test.txt\"}".to_string(),
1828                },
1829            }]),
1830            tool_call_id: None,
1831        };
1832
1833        let json = serde_json::to_string(&message).unwrap();
1834        assert!(json.contains("\"role\":\"assistant\""));
1835        assert!(json.contains("\"tool_calls\""));
1836        assert!(json.contains("\"id\":\"call_123\""));
1837        assert!(json.contains("\"type\":\"function\""));
1838        assert!(json.contains("\"name\":\"read_file\""));
1839    }
1840
1841    #[test]
1842    fn test_api_tool_message_serialization() {
1843        let message = ApiMessage {
1844            role: ApiRole::Tool,
1845            content: Some("File contents here".to_string()),
1846            reasoning_content: None,
1847            tool_calls: None,
1848            tool_call_id: Some("call_123".to_string()),
1849        };
1850
1851        let json = serde_json::to_string(&message).unwrap();
1852        assert!(json.contains("\"role\":\"tool\""));
1853        assert!(json.contains("\"tool_call_id\":\"call_123\""));
1854        assert!(json.contains("\"content\":\"File contents here\""));
1855    }
1856
1857    #[test]
1858    fn test_api_tool_serialization() {
1859        let tool = ApiTool {
1860            r#type: "function".to_string(),
1861            function: ApiFunction {
1862                name: "test_tool".to_string(),
1863                description: "A test tool".to_string(),
1864                parameters: serde_json::json!({
1865                    "type": "object",
1866                    "properties": {
1867                        "arg": {"type": "string"}
1868                    }
1869                }),
1870            },
1871        };
1872
1873        let json = serde_json::to_string(&tool).unwrap();
1874        assert!(json.contains("\"type\":\"function\""));
1875        assert!(json.contains("\"name\":\"test_tool\""));
1876        assert!(json.contains("\"description\":\"A test tool\""));
1877        assert!(json.contains("\"parameters\""));
1878    }
1879
1880    // ===================
1881    // API Type Deserialization Tests
1882    // ===================
1883
1884    #[test]
1885    fn test_api_response_deserialization() {
1886        let json = r#"{
1887            "id": "chatcmpl-123",
1888            "choices": [
1889                {
1890                    "message": {
1891                        "content": "Hello!"
1892                    },
1893                    "finish_reason": "stop"
1894                }
1895            ],
1896            "model": "gpt-4o",
1897            "usage": {
1898                "prompt_tokens": 100,
1899                "completion_tokens": 50
1900            }
1901        }"#;
1902
1903        let response: ApiChatResponse = serde_json::from_str(json).unwrap();
1904        assert_eq!(response.id, "chatcmpl-123");
1905        assert_eq!(response.model, "gpt-4o");
1906        assert_eq!(response.usage.prompt_tokens, 100);
1907        assert_eq!(response.usage.completion_tokens, 50);
1908        assert_eq!(response.choices.len(), 1);
1909        assert_eq!(
1910            response.choices[0].message.content,
1911            Some("Hello!".to_string())
1912        );
1913    }
1914
1915    #[test]
1916    fn test_api_response_with_tool_calls_deserialization() {
1917        let json = r#"{
1918            "id": "chatcmpl-456",
1919            "choices": [
1920                {
1921                    "message": {
1922                        "content": null,
1923                        "tool_calls": [
1924                            {
1925                                "id": "call_abc",
1926                                "type": "function",
1927                                "function": {
1928                                    "name": "read_file",
1929                                    "arguments": "{\"path\": \"test.txt\"}"
1930                                }
1931                            }
1932                        ]
1933                    },
1934                    "finish_reason": "tool_calls"
1935                }
1936            ],
1937            "model": "gpt-4o",
1938            "usage": {
1939                "prompt_tokens": 150,
1940                "completion_tokens": 30
1941            }
1942        }"#;
1943
1944        let response: ApiChatResponse = serde_json::from_str(json).unwrap();
1945        let tool_calls = response.choices[0].message.tool_calls.as_ref().unwrap();
1946        assert_eq!(tool_calls.len(), 1);
1947        assert_eq!(tool_calls[0].id, "call_abc");
1948        assert_eq!(tool_calls[0].function.name, "read_file");
1949    }
1950
1951    #[test]
1952    fn test_api_response_with_unknown_finish_reason_deserialization() {
1953        let json = r#"{
1954            "id": "chatcmpl-789",
1955            "choices": [
1956                {
1957                    "message": {
1958                        "content": "ok"
1959                    },
1960                    "finish_reason": "vendor_custom_reason"
1961                }
1962            ],
1963            "model": "glm-5",
1964            "usage": {
1965                "prompt_tokens": 10,
1966                "completion_tokens": 5
1967            }
1968        }"#;
1969
1970        let response: ApiChatResponse = serde_json::from_str(json).unwrap();
1971        assert_eq!(
1972            response.choices[0].finish_reason.as_deref(),
1973            Some("vendor_custom_reason")
1974        );
1975        assert_eq!(
1976            map_finish_reason(response.choices[0].finish_reason.as_deref().unwrap()),
1977            StopReason::StopSequence
1978        );
1979    }
1980
1981    #[test]
1982    fn test_map_finish_reason_covers_vendor_specific_values() {
1983        assert_eq!(map_finish_reason("stop"), StopReason::EndTurn);
1984        assert_eq!(map_finish_reason("tool_calls"), StopReason::ToolUse);
1985        assert_eq!(map_finish_reason("length"), StopReason::MaxTokens);
1986        assert_eq!(
1987            map_finish_reason("content_filter"),
1988            StopReason::StopSequence
1989        );
1990        assert_eq!(map_finish_reason("sensitive"), StopReason::Refusal);
1991        assert_eq!(map_finish_reason("network_error"), StopReason::StopSequence);
1992        assert_eq!(
1993            map_finish_reason("some_new_reason"),
1994            StopReason::StopSequence
1995        );
1996    }
1997
1998    // ===================
1999    // Message Conversion Tests
2000    // ===================
2001
2002    #[test]
2003    fn test_build_api_messages_with_system() {
2004        let request = ChatRequest {
2005            system: "You are helpful.".to_string(),
2006            messages: vec![agent_sdk_foundation::llm::Message::user("Hello")],
2007            tools: None,
2008            max_tokens: 1024,
2009            max_tokens_explicit: true,
2010            session_id: None,
2011            cached_content: None,
2012            thinking: None,
2013            tool_choice: None,
2014            response_format: None,
2015        };
2016
2017        let api_messages = build_api_messages(&request);
2018        assert_eq!(api_messages.len(), 2);
2019        assert_eq!(api_messages[0].role, ApiRole::System);
2020        assert_eq!(
2021            api_messages[0].content,
2022            Some("You are helpful.".to_string())
2023        );
2024        assert_eq!(api_messages[1].role, ApiRole::User);
2025        assert_eq!(api_messages[1].content, Some("Hello".to_string()));
2026    }
2027
2028    #[test]
2029    fn test_build_api_messages_empty_system() {
2030        let request = ChatRequest {
2031            system: String::new(),
2032            messages: vec![agent_sdk_foundation::llm::Message::user("Hello")],
2033            tools: None,
2034            max_tokens: 1024,
2035            max_tokens_explicit: true,
2036            session_id: None,
2037            cached_content: None,
2038            thinking: None,
2039            tool_choice: None,
2040            response_format: None,
2041        };
2042
2043        let api_messages = build_api_messages(&request);
2044        assert_eq!(api_messages.len(), 1);
2045        assert_eq!(api_messages[0].role, ApiRole::User);
2046    }
2047
2048    fn request_with_messages(messages: Vec<agent_sdk_foundation::llm::Message>) -> ChatRequest {
2049        ChatRequest {
2050            system: String::new(),
2051            messages,
2052            tools: None,
2053            max_tokens: 1024,
2054            max_tokens_explicit: true,
2055            session_id: None,
2056            cached_content: None,
2057            thinking: None,
2058            tool_choice: None,
2059            response_format: None,
2060        }
2061    }
2062
2063    #[test]
2064    fn test_build_api_messages_echoes_assistant_reasoning_content_on_tool_call()
2065    -> anyhow::Result<()> {
2066        // DeepSeek V4 thinking-mode requires the prior assistant turn's
2067        // reasoning to be echoed back as `reasoning_content` ONLY on a turn
2068        // that also performed a tool call, or the API 400s.
2069        let request = request_with_messages(vec![
2070            agent_sdk_foundation::llm::Message::user("What is the weather?"),
2071            agent_sdk_foundation::llm::Message::assistant_with_content(vec![
2072                ContentBlock::Thinking {
2073                    thinking: "I should call the weather tool.".to_string(),
2074                    signature: None,
2075                },
2076                ContentBlock::ToolUse {
2077                    id: "call_1".to_string(),
2078                    name: "get_weather".to_string(),
2079                    input: serde_json::json!({"city": "Paris"}),
2080                    thought_signature: None,
2081                },
2082            ]),
2083        ]);
2084
2085        let api_messages = build_api_messages(&request);
2086        let assistant = api_messages
2087            .iter()
2088            .find(|m| m.role == ApiRole::Assistant)
2089            .context("assistant message present")?;
2090        assert!(assistant.tool_calls.is_some());
2091        assert_eq!(
2092            assistant.reasoning_content,
2093            Some("I should call the weather tool.".to_string())
2094        );
2095        Ok(())
2096    }
2097
2098    #[test]
2099    fn test_build_api_messages_reasoning_content_serializes_on_tool_call_turn() -> anyhow::Result<()>
2100    {
2101        let request = request_with_messages(vec![
2102            agent_sdk_foundation::llm::Message::assistant_with_content(vec![
2103                ContentBlock::Thinking {
2104                    thinking: "thinking out loud".to_string(),
2105                    signature: None,
2106                },
2107                ContentBlock::ToolUse {
2108                    id: "call_1".to_string(),
2109                    name: "do_thing".to_string(),
2110                    input: serde_json::json!({}),
2111                    thought_signature: None,
2112                },
2113            ]),
2114        ]);
2115
2116        let api_messages = build_api_messages(&request);
2117        let json = serde_json::to_string(&api_messages).context("serialize api messages")?;
2118        assert!(json.contains("\"reasoning_content\":\"thinking out loud\""));
2119        Ok(())
2120    }
2121
2122    #[test]
2123    fn test_build_api_messages_reasoning_only_turn_is_not_echoed() -> anyhow::Result<()> {
2124        // A reasoning-only assistant turn (no visible text, no tool call) must
2125        // NOT carry reasoning_content: legacy `deepseek-reasoner` 400s if
2126        // reasoning_content appears in input, and DeepSeek V4 thinking-mode only
2127        // needs it on tool-call turns. With no other payload the turn collapses
2128        // to nothing and is dropped entirely.
2129        let request = request_with_messages(vec![
2130            agent_sdk_foundation::llm::Message::assistant_with_content(vec![
2131                ContentBlock::Thinking {
2132                    thinking: "pondering".to_string(),
2133                    signature: None,
2134                },
2135            ]),
2136        ]);
2137
2138        let api_messages = build_api_messages(&request);
2139        let json = serde_json::to_string(&api_messages).context("serialize api messages")?;
2140        assert!(!json.contains("reasoning_content"));
2141        assert!(api_messages.is_empty());
2142        Ok(())
2143    }
2144
2145    #[test]
2146    fn test_build_api_messages_reasoning_with_text_no_tool_call_is_not_echoed() -> anyhow::Result<()>
2147    {
2148        // An assistant turn carrying reasoning + visible text but NO tool call
2149        // is emitted for its text, but its reasoning is NOT echoed back.
2150        let request = request_with_messages(vec![
2151            agent_sdk_foundation::llm::Message::user("What is 2+2?"),
2152            agent_sdk_foundation::llm::Message::assistant_with_content(vec![
2153                ContentBlock::Thinking {
2154                    thinking: "Let me add 2 and 2.".to_string(),
2155                    signature: None,
2156                },
2157                ContentBlock::Text {
2158                    text: "4".to_string(),
2159                },
2160            ]),
2161            agent_sdk_foundation::llm::Message::user("And 3+3?"),
2162        ]);
2163
2164        let api_messages = build_api_messages(&request);
2165        let json = serde_json::to_string(&api_messages).context("serialize api messages")?;
2166        assert!(!json.contains("reasoning_content"));
2167        let assistant = api_messages
2168            .iter()
2169            .find(|m| m.role == ApiRole::Assistant)
2170            .context("assistant message present")?;
2171        assert_eq!(assistant.content, Some("4".to_string()));
2172        assert_eq!(assistant.reasoning_content, None);
2173        Ok(())
2174    }
2175
2176    #[test]
2177    fn test_build_api_messages_normal_path_has_no_reasoning_content() -> anyhow::Result<()> {
2178        // Normal path unchanged: an assistant turn with no Thinking block must
2179        // not attach reasoning_content.
2180        let request = request_with_messages(vec![
2181            agent_sdk_foundation::llm::Message::user("hi"),
2182            agent_sdk_foundation::llm::Message::assistant_with_content(vec![ContentBlock::Text {
2183                text: "hello".to_string(),
2184            }]),
2185        ]);
2186
2187        let api_messages = build_api_messages(&request);
2188        let json = serde_json::to_string(&api_messages).context("serialize api messages")?;
2189        assert!(!json.contains("reasoning_content"));
2190        let assistant = api_messages
2191            .iter()
2192            .find(|m| m.role == ApiRole::Assistant)
2193            .context("assistant message present")?;
2194        assert_eq!(assistant.reasoning_content, None);
2195        Ok(())
2196    }
2197
2198    #[test]
2199    fn test_build_api_messages_does_not_attach_reasoning_to_user_blocks() {
2200        // A user turn carrying a Thinking block (unusual, but possible) must not
2201        // be turned into a reasoning_content echo.
2202        let request =
2203            request_with_messages(vec![agent_sdk_foundation::llm::Message::user_with_content(
2204                vec![
2205                    ContentBlock::Thinking {
2206                        thinking: "user-side thinking".to_string(),
2207                        signature: None,
2208                    },
2209                    ContentBlock::Text {
2210                        text: "question".to_string(),
2211                    },
2212                ],
2213            )]);
2214
2215        let api_messages = build_api_messages(&request);
2216        assert_eq!(api_messages.len(), 1);
2217        assert_eq!(api_messages[0].role, ApiRole::User);
2218        assert_eq!(api_messages[0].reasoning_content, None);
2219    }
2220
2221    #[test]
2222    fn test_convert_tool() {
2223        let tool = agent_sdk_foundation::llm::Tool {
2224            name: "test_tool".to_string(),
2225            description: "A test tool".to_string(),
2226            input_schema: serde_json::json!({"type": "object"}),
2227            display_name: "Test Tool".to_string(),
2228            tier: agent_sdk_foundation::ToolTier::Observe,
2229        };
2230
2231        let api_tool = convert_tool(tool);
2232        assert_eq!(api_tool.r#type, "function");
2233        assert_eq!(api_tool.function.name, "test_tool");
2234        assert_eq!(api_tool.function.description, "A test tool");
2235    }
2236
2237    #[test]
2238    fn test_build_content_blocks_text_only() {
2239        let message = ApiResponseMessage {
2240            content: Some("Hello!".to_string()),
2241            tool_calls: None,
2242            reasoning_content: None,
2243            reasoning: None,
2244        };
2245
2246        let blocks = build_content_blocks(&message);
2247        assert_eq!(blocks.len(), 1);
2248        assert!(matches!(&blocks[0], ContentBlock::Text { text } if text == "Hello!"));
2249    }
2250
2251    #[test]
2252    fn test_build_content_blocks_with_tool_calls() {
2253        let message = ApiResponseMessage {
2254            content: Some("Let me help.".to_string()),
2255            tool_calls: Some(vec![ApiResponseToolCall {
2256                id: "call_123".to_string(),
2257                function: ApiResponseFunctionCall {
2258                    name: "read_file".to_string(),
2259                    arguments: "{\"path\": \"test.txt\"}".to_string(),
2260                },
2261            }]),
2262            reasoning_content: None,
2263            reasoning: None,
2264        };
2265
2266        let blocks = build_content_blocks(&message);
2267        assert_eq!(blocks.len(), 2);
2268        assert!(matches!(&blocks[0], ContentBlock::Text { text } if text == "Let me help."));
2269        assert!(
2270            matches!(&blocks[1], ContentBlock::ToolUse { id, name, .. } if id == "call_123" && name == "read_file")
2271        );
2272    }
2273
2274    #[test]
2275    fn test_build_content_blocks_falls_back_to_reasoning_content_when_content_empty() {
2276        // DeepSeek-style: answer / usable output arrives in reasoning_content
2277        // while content is null. Without the fallback this dropped all output.
2278        let message = ApiResponseMessage {
2279            content: None,
2280            tool_calls: None,
2281            reasoning_content: Some("The answer is 42.".to_string()),
2282            reasoning: None,
2283        };
2284
2285        let blocks = build_content_blocks(&message);
2286        assert_eq!(blocks.len(), 1);
2287        assert!(
2288            matches!(&blocks[0], ContentBlock::Thinking { thinking, signature } if thinking == "The answer is 42." && signature.is_none())
2289        );
2290    }
2291
2292    #[test]
2293    fn test_build_content_blocks_falls_back_to_reasoning_field() {
2294        // Some OpenRouter upstreams normalize reasoning under `reasoning`.
2295        let message = ApiResponseMessage {
2296            content: Some(String::new()),
2297            tool_calls: None,
2298            reasoning_content: None,
2299            reasoning: Some("Considering options...".to_string()),
2300        };
2301
2302        let blocks = build_content_blocks(&message);
2303        assert_eq!(blocks.len(), 1);
2304        assert!(
2305            matches!(&blocks[0], ContentBlock::Thinking { thinking, .. } if thinking == "Considering options...")
2306        );
2307    }
2308
2309    #[test]
2310    fn test_build_content_blocks_prefers_reasoning_content_over_reasoning() {
2311        let message = ApiResponseMessage {
2312            content: None,
2313            tool_calls: None,
2314            reasoning_content: Some("primary".to_string()),
2315            reasoning: Some("secondary".to_string()),
2316        };
2317
2318        let blocks = build_content_blocks(&message);
2319        assert_eq!(blocks.len(), 1);
2320        assert!(
2321            matches!(&blocks[0], ContentBlock::Thinking { thinking, .. } if thinking == "primary")
2322        );
2323    }
2324
2325    #[test]
2326    fn test_build_content_blocks_does_not_add_reasoning_when_content_present() {
2327        // The normal content-present case must be unchanged: reasoning is NOT
2328        // surfaced as a Thinking block when there is usable text content.
2329        let message = ApiResponseMessage {
2330            content: Some("Final answer.".to_string()),
2331            tool_calls: None,
2332            reasoning_content: Some("internal chain of thought".to_string()),
2333            reasoning: None,
2334        };
2335
2336        let blocks = build_content_blocks(&message);
2337        assert_eq!(blocks.len(), 1);
2338        assert!(matches!(&blocks[0], ContentBlock::Text { text } if text == "Final answer."));
2339    }
2340
2341    #[test]
2342    fn test_build_content_blocks_reasoning_fallback_with_tool_calls() {
2343        // Empty content + reasoning + a tool call: surface the reasoning AND the
2344        // tool call (reasoning model under tight max_tokens that still tool-called).
2345        let message = ApiResponseMessage {
2346            content: None,
2347            tool_calls: Some(vec![ApiResponseToolCall {
2348                id: "call_1".to_string(),
2349                function: ApiResponseFunctionCall {
2350                    name: "search".to_string(),
2351                    arguments: "{}".to_string(),
2352                },
2353            }]),
2354            reasoning_content: Some("I should search.".to_string()),
2355            reasoning: None,
2356        };
2357
2358        let blocks = build_content_blocks(&message);
2359        assert_eq!(blocks.len(), 2);
2360        assert!(
2361            matches!(&blocks[0], ContentBlock::Thinking { thinking, .. } if thinking == "I should search.")
2362        );
2363        assert!(matches!(&blocks[1], ContentBlock::ToolUse { name, .. } if name == "search"));
2364    }
2365
2366    #[test]
2367    fn test_build_content_blocks_empty_message_yields_no_blocks() {
2368        // Genuine truncation with no reasoning text: still produce nothing
2369        // (behavior unchanged for the empty case).
2370        let message = ApiResponseMessage {
2371            content: None,
2372            tool_calls: None,
2373            reasoning_content: None,
2374            reasoning: None,
2375        };
2376
2377        let blocks = build_content_blocks(&message);
2378        assert!(blocks.is_empty());
2379    }
2380
2381    #[test]
2382    fn test_api_response_message_deserializes_reasoning_content() {
2383        let json = r#"{
2384            "content": null,
2385            "reasoning_content": "step by step"
2386        }"#;
2387
2388        let message: ApiResponseMessage = serde_json::from_str(json).unwrap();
2389        assert_eq!(reasoning_text(&message), Some("step by step"));
2390        assert!(message.content.is_none());
2391    }
2392
2393    // ===================
2394    // SSE Streaming Type Tests
2395    // ===================
2396
2397    #[test]
2398    fn test_sse_chunk_text_delta_deserialization() {
2399        let json = r#"{
2400            "choices": [{
2401                "delta": {
2402                    "content": "Hello"
2403                },
2404                "finish_reason": null
2405            }]
2406        }"#;
2407
2408        let chunk: SseChunk = serde_json::from_str(json).unwrap();
2409        assert_eq!(chunk.choices.len(), 1);
2410        assert_eq!(chunk.choices[0].delta.content, Some("Hello".to_string()));
2411        assert!(chunk.choices[0].finish_reason.is_none());
2412    }
2413
2414    #[test]
2415    fn test_sse_chunk_tool_call_delta_deserialization() {
2416        let json = r#"{
2417            "choices": [{
2418                "delta": {
2419                    "tool_calls": [{
2420                        "index": 0,
2421                        "id": "call_abc",
2422                        "function": {
2423                            "name": "read_file",
2424                            "arguments": ""
2425                        }
2426                    }]
2427                },
2428                "finish_reason": null
2429            }]
2430        }"#;
2431
2432        let chunk: SseChunk = serde_json::from_str(json).unwrap();
2433        let tool_calls = chunk.choices[0].delta.tool_calls.as_ref().unwrap();
2434        assert_eq!(tool_calls.len(), 1);
2435        assert_eq!(tool_calls[0].index, 0);
2436        assert_eq!(tool_calls[0].id, Some("call_abc".to_string()));
2437        assert_eq!(
2438            tool_calls[0].function.as_ref().unwrap().name,
2439            Some("read_file".to_string())
2440        );
2441    }
2442
2443    #[test]
2444    fn test_sse_chunk_tool_call_arguments_delta_deserialization() {
2445        let json = r#"{
2446            "choices": [{
2447                "delta": {
2448                    "tool_calls": [{
2449                        "index": 0,
2450                        "function": {
2451                            "arguments": "{\"path\":"
2452                        }
2453                    }]
2454                },
2455                "finish_reason": null
2456            }]
2457        }"#;
2458
2459        let chunk: SseChunk = serde_json::from_str(json).unwrap();
2460        let tool_calls = chunk.choices[0].delta.tool_calls.as_ref().unwrap();
2461        assert_eq!(tool_calls[0].id, None);
2462        assert_eq!(
2463            tool_calls[0].function.as_ref().unwrap().arguments,
2464            Some("{\"path\":".to_string())
2465        );
2466    }
2467
2468    #[test]
2469    fn test_sse_chunk_with_finish_reason_deserialization() {
2470        let json = r#"{
2471            "choices": [{
2472                "delta": {},
2473                "finish_reason": "stop"
2474            }]
2475        }"#;
2476
2477        let chunk: SseChunk = serde_json::from_str(json).unwrap();
2478        assert_eq!(chunk.choices[0].finish_reason.as_deref(), Some("stop"));
2479    }
2480
2481    #[test]
2482    fn test_sse_chunk_with_usage_deserialization() {
2483        let json = r#"{
2484            "choices": [{
2485                "delta": {},
2486                "finish_reason": "stop"
2487            }],
2488            "usage": {
2489                "prompt_tokens": 100,
2490                "completion_tokens": 50
2491            }
2492        }"#;
2493
2494        let chunk: SseChunk = serde_json::from_str(json).unwrap();
2495        let usage = chunk.usage.unwrap();
2496        assert_eq!(usage.prompt_tokens, 100);
2497        assert_eq!(usage.completion_tokens, 50);
2498    }
2499
2500    #[test]
2501    fn test_sse_chunk_with_float_usage_deserialization() {
2502        let json = r#"{
2503            "choices": [{
2504                "delta": {},
2505                "finish_reason": "stop"
2506            }],
2507            "usage": {
2508                "prompt_tokens": 100.0,
2509                "completion_tokens": 50.0
2510            }
2511        }"#;
2512
2513        let chunk: SseChunk = serde_json::from_str(json).unwrap();
2514        let usage = chunk.usage.unwrap();
2515        assert_eq!(usage.prompt_tokens, 100);
2516        assert_eq!(usage.completion_tokens, 50);
2517    }
2518
2519    #[test]
2520    fn test_api_usage_deserializes_integer_compatible_numbers() {
2521        let json = r#"{
2522            "prompt_tokens": 42.0,
2523            "completion_tokens": 7
2524        }"#;
2525
2526        let usage: ApiUsage = serde_json::from_str(json).unwrap();
2527        assert_eq!(usage.prompt_tokens, 42);
2528        assert_eq!(usage.completion_tokens, 7);
2529    }
2530
2531    #[test]
2532    fn test_api_usage_deserializes_cached_tokens() {
2533        let json = r#"{
2534            "prompt_tokens": 42,
2535            "completion_tokens": 7,
2536            "prompt_tokens_details": {
2537                "cached_tokens": 10
2538            }
2539        }"#;
2540
2541        let usage: ApiUsage = serde_json::from_str(json).unwrap();
2542        assert_eq!(usage.prompt_tokens, 42);
2543        assert_eq!(usage.completion_tokens, 7);
2544        assert_eq!(usage.prompt_tokens_details.unwrap().cached_tokens, 10);
2545    }
2546
2547    #[test]
2548    fn test_process_sse_data_maps_cached_tokens_to_cache_read_usage() {
2549        let results = process_sse_data(
2550            r#"{
2551                "choices": [],
2552                "usage": {
2553                    "prompt_tokens": 42,
2554                    "completion_tokens": 7,
2555                    "prompt_tokens_details": {
2556                        "cached_tokens": 10
2557                    }
2558                }
2559            }"#,
2560        );
2561
2562        assert!(matches!(
2563            results.as_slice(),
2564            [SseProcessResult::Usage(Usage {
2565                input_tokens: 42,
2566                output_tokens: 7,
2567                cached_input_tokens: 10,
2568                cache_creation_input_tokens: 0,
2569            })]
2570        ));
2571    }
2572
2573    #[test]
2574    fn test_sse_delta_deserializes_reasoning_fields() -> anyhow::Result<()> {
2575        // The streaming delta struct must accept DeepSeek `reasoning_content`
2576        // and OpenRouter-normalized `reasoning` so reasoning tokens are not
2577        // dropped on deserialization.
2578        let chunk: SseChunk = serde_json::from_str(
2579            r#"{
2580                "choices": [{
2581                    "delta": {
2582                        "reasoning_content": "step one"
2583                    },
2584                    "finish_reason": null
2585                }]
2586            }"#,
2587        )
2588        .context("deserialize sse chunk")?;
2589        assert_eq!(
2590            chunk.choices[0].delta.reasoning_content,
2591            Some("step one".to_string())
2592        );
2593        assert!(chunk.choices[0].delta.content.is_none());
2594        Ok(())
2595    }
2596
2597    #[test]
2598    fn test_process_sse_data_emits_thinking_delta_from_reasoning_content() {
2599        // Reasoning-model fallback under streaming: a delta whose visible
2600        // `content` is absent but whose `reasoning_content` carries tokens must
2601        // surface as a ThinkingDelta, mirroring the non-streaming fallback so the
2602        // output is not silently dropped.
2603        let results = process_sse_data(
2604            r#"{
2605                "choices": [{
2606                    "delta": { "reasoning_content": "thinking..." },
2607                    "finish_reason": null
2608                }]
2609            }"#,
2610        );
2611
2612        assert!(matches!(
2613            results.as_slice(),
2614            [SseProcessResult::ThinkingDelta(text)] if text == "thinking..."
2615        ));
2616    }
2617
2618    #[test]
2619    fn test_process_sse_data_emits_thinking_delta_from_reasoning_field() {
2620        // OpenRouter-normalized `reasoning` field is an equivalent fallback.
2621        let results = process_sse_data(
2622            r#"{
2623                "choices": [{
2624                    "delta": { "reasoning": "pondering" },
2625                    "finish_reason": null
2626                }]
2627            }"#,
2628        );
2629
2630        assert!(matches!(
2631            results.as_slice(),
2632            [SseProcessResult::ThinkingDelta(text)] if text == "pondering"
2633        ));
2634    }
2635
2636    #[test]
2637    fn test_process_sse_data_prefers_text_content_over_reasoning() {
2638        // When visible `content` is present, it takes precedence and the
2639        // reasoning fallback does not fire (mirrors non-streaming behavior).
2640        let results = process_sse_data(
2641            r#"{
2642                "choices": [{
2643                    "delta": {
2644                        "content": "answer",
2645                        "reasoning_content": "ignored"
2646                    },
2647                    "finish_reason": null
2648                }]
2649            }"#,
2650        );
2651
2652        assert!(matches!(
2653            results.as_slice(),
2654            [SseProcessResult::TextDelta(text)] if text == "answer"
2655        ));
2656    }
2657
2658    #[test]
2659    fn test_process_sse_data_empty_content_falls_back_to_reasoning() {
2660        // An explicitly empty `content` string must still trigger the reasoning
2661        // fallback rather than emitting an empty TextDelta.
2662        let results = process_sse_data(
2663            r#"{
2664                "choices": [{
2665                    "delta": {
2666                        "content": "",
2667                        "reasoning_content": "fallback"
2668                    },
2669                    "finish_reason": null
2670                }]
2671            }"#,
2672        );
2673
2674        assert!(matches!(
2675            results.as_slice(),
2676            [SseProcessResult::ThinkingDelta(text)] if text == "fallback"
2677        ));
2678    }
2679
2680    #[test]
2681    fn test_api_usage_rejects_fractional_numbers() {
2682        let json = r#"{
2683            "prompt_tokens": 42.5,
2684            "completion_tokens": 7
2685        }"#;
2686
2687        let usage: std::result::Result<ApiUsage, _> = serde_json::from_str(json);
2688        assert!(usage.is_err());
2689    }
2690
2691    #[test]
2692    fn test_use_max_tokens_alias_for_vendor_urls() {
2693        assert!(!use_max_tokens_alias(DEFAULT_BASE_URL));
2694        assert!(use_max_tokens_alias(BASE_URL_KIMI));
2695        assert!(use_max_tokens_alias(BASE_URL_ZAI));
2696        assert!(use_max_tokens_alias(BASE_URL_MINIMAX));
2697    }
2698
2699    #[test]
2700    fn test_requires_responses_api_only_for_legacy_codex_model() {
2701        assert!(requires_responses_api(MODEL_GPT52_CODEX));
2702        assert!(!requires_responses_api(MODEL_GPT53_CODEX));
2703        assert!(!requires_responses_api(MODEL_GPT54));
2704    }
2705
2706    #[test]
2707    fn test_should_use_responses_api_for_official_agentic_requests() {
2708        let request = ChatRequest {
2709            system: String::new(),
2710            messages: vec![agent_sdk_foundation::llm::Message::user("Hello")],
2711            tools: Some(vec![agent_sdk_foundation::llm::Tool {
2712                name: "read_file".to_string(),
2713                description: "Read a file".to_string(),
2714                input_schema: serde_json::json!({"type": "object"}),
2715                display_name: "Read File".to_string(),
2716                tier: agent_sdk_foundation::ToolTier::Observe,
2717            }]),
2718            max_tokens: 1024,
2719            max_tokens_explicit: true,
2720            session_id: Some("thread-1".to_string()),
2721            cached_content: None,
2722            thinking: None,
2723            tool_choice: None,
2724            response_format: None,
2725        };
2726
2727        assert!(should_use_responses_api(
2728            DEFAULT_BASE_URL,
2729            MODEL_GPT54,
2730            &request
2731        ));
2732        assert!(!should_use_responses_api(
2733            BASE_URL_KIMI,
2734            MODEL_GPT54,
2735            &request
2736        ));
2737    }
2738
2739    #[test]
2740    fn test_build_api_reasoning_maps_enabled_budget_to_effort() {
2741        let reasoning = build_api_reasoning(Some(&ThinkingConfig::new(40_000))).unwrap();
2742        assert!(matches!(reasoning.effort, ReasoningEffort::XHigh));
2743    }
2744
2745    #[test]
2746    fn test_build_api_reasoning_uses_explicit_effort() {
2747        let reasoning =
2748            build_api_reasoning(Some(&ThinkingConfig::adaptive_with_effort(Effort::High))).unwrap();
2749        assert!(matches!(reasoning.effort, ReasoningEffort::High));
2750    }
2751
2752    #[test]
2753    fn test_build_api_reasoning_omits_adaptive_without_effort() {
2754        assert!(build_api_reasoning(Some(&ThinkingConfig::adaptive())).is_none());
2755    }
2756
2757    #[test]
2758    fn test_openai_rejects_adaptive_thinking() {
2759        let provider = OpenAIProvider::gpt54("test-key".to_string());
2760        let error = provider
2761            .validate_thinking_config(Some(&ThinkingConfig::adaptive()))
2762            .unwrap_err();
2763        assert!(
2764            error
2765                .to_string()
2766                .contains("adaptive thinking is not supported")
2767        );
2768    }
2769
2770    #[test]
2771    fn test_openai_non_reasoning_models_reject_thinking() {
2772        let provider = OpenAIProvider::gpt4o("test-key".to_string());
2773        let error = provider
2774            .validate_thinking_config(Some(&ThinkingConfig::new(10_000)))
2775            .unwrap_err();
2776        assert!(error.to_string().contains("thinking is not supported"));
2777    }
2778
2779    #[test]
2780    fn test_request_serialization_openai_uses_max_completion_tokens_only() {
2781        let messages = vec![ApiMessage {
2782            role: ApiRole::User,
2783            content: Some("Hello".to_string()),
2784            reasoning_content: None,
2785            tool_calls: None,
2786            tool_call_id: None,
2787        }];
2788
2789        let request = ApiChatRequest {
2790            model: "gpt-4o",
2791            messages: &messages,
2792            max_completion_tokens: Some(1024),
2793            max_tokens: None,
2794            tools: None,
2795            tool_choice: None,
2796            reasoning: None,
2797            response_format: None,
2798        };
2799
2800        let json = serde_json::to_string(&request).unwrap();
2801        assert!(json.contains("\"max_completion_tokens\":1024"));
2802        assert!(!json.contains("\"max_tokens\""));
2803    }
2804
2805    #[test]
2806    fn test_request_serialization_with_max_tokens_alias() {
2807        let messages = vec![ApiMessage {
2808            role: ApiRole::User,
2809            content: Some("Hello".to_string()),
2810            reasoning_content: None,
2811            tool_calls: None,
2812            tool_call_id: None,
2813        }];
2814
2815        let request = ApiChatRequest {
2816            model: "glm-5",
2817            messages: &messages,
2818            max_completion_tokens: Some(1024),
2819            max_tokens: Some(1024),
2820            tools: None,
2821            tool_choice: None,
2822            reasoning: None,
2823            response_format: None,
2824        };
2825
2826        let json = serde_json::to_string(&request).unwrap();
2827        assert!(json.contains("\"max_completion_tokens\":1024"));
2828        assert!(json.contains("\"max_tokens\":1024"));
2829    }
2830
2831    #[test]
2832    fn test_streaming_request_serialization_openai_default() {
2833        let messages = vec![ApiMessage {
2834            role: ApiRole::User,
2835            content: Some("Hello".to_string()),
2836            reasoning_content: None,
2837            tool_calls: None,
2838            tool_call_id: None,
2839        }];
2840
2841        let request = ApiChatRequestStreaming {
2842            model: "gpt-4o",
2843            messages: &messages,
2844            max_completion_tokens: Some(1024),
2845            max_tokens: None,
2846            tools: None,
2847            tool_choice: None,
2848            reasoning: None,
2849            response_format: None,
2850            stream_options: Some(ApiStreamOptions {
2851                include_usage: true,
2852            }),
2853            usage: None,
2854            stream: true,
2855        };
2856
2857        let json = serde_json::to_string(&request).unwrap();
2858        assert!(json.contains("\"stream\":true"));
2859        assert!(json.contains("\"model\":\"gpt-4o\""));
2860        assert!(json.contains("\"max_completion_tokens\":1024"));
2861        assert!(json.contains("\"stream_options\":{\"include_usage\":true}"));
2862        assert!(!json.contains("\"max_tokens\""));
2863    }
2864
2865    #[test]
2866    fn stream_usage_is_requested_for_every_endpoint() {
2867        // issue #302: usage must be requested on ALL OpenAI-compatible
2868        // endpoints, not just api.openai.com, so OpenRouter/Baseten/local
2869        // turns report token usage to cost ledgers and budgets.
2870        assert!(use_stream_usage_options("https://api.openai.com/v1"));
2871        assert!(use_stream_usage_options("https://openrouter.ai/api/v1"));
2872        assert!(use_stream_usage_options("https://host.baseten.co/v1"));
2873        assert!(use_stream_usage_options("http://localhost:1234/v1"));
2874    }
2875
2876    #[test]
2877    fn openrouter_usage_flag_only_for_openrouter() {
2878        assert!(use_openrouter_usage_options("https://openrouter.ai/api/v1"));
2879        assert!(!use_openrouter_usage_options("https://api.openai.com/v1"));
2880    }
2881
2882    #[test]
2883    fn streaming_request_serializes_openrouter_usage_flag() -> anyhow::Result<()> {
2884        let messages = vec![ApiMessage {
2885            role: ApiRole::User,
2886            content: Some("hi".to_string()),
2887            reasoning_content: None,
2888            tool_calls: None,
2889            tool_call_id: None,
2890        }];
2891        let request = ApiChatRequestStreaming {
2892            model: "anthropic/claude-3.5",
2893            messages: &messages,
2894            max_completion_tokens: Some(16),
2895            max_tokens: None,
2896            tools: None,
2897            tool_choice: None,
2898            reasoning: None,
2899            response_format: None,
2900            stream_options: Some(ApiStreamOptions {
2901                include_usage: true,
2902            }),
2903            usage: Some(ApiOpenRouterUsageOptions { include: true }),
2904            stream: true,
2905        };
2906        let json = serde_json::to_string(&request)?;
2907        assert!(json.contains("\"usage\":{\"include\":true}"));
2908        assert!(json.contains("\"stream_options\":{\"include_usage\":true}"));
2909        Ok(())
2910    }
2911
2912    #[test]
2913    fn usage_only_chunk_without_choices_deserializes() -> anyhow::Result<()> {
2914        // OpenAI's trailing usage frame (and some OpenRouter frames) omit
2915        // `choices` entirely; the chunk must still deserialize so the usage is
2916        // captured instead of being silently dropped (issue #302).
2917        let no_choices: SseChunk = serde_json::from_str("{}")?;
2918        assert!(no_choices.choices.is_empty());
2919
2920        let usage_only: SseChunk =
2921            serde_json::from_str(r#"{"usage":{"prompt_tokens":10,"completion_tokens":5}}"#)?;
2922        assert!(usage_only.choices.is_empty());
2923        assert!(usage_only.usage.is_some());
2924        Ok(())
2925    }
2926
2927    #[test]
2928    fn test_streaming_request_serialization_with_max_tokens_alias() {
2929        let messages = vec![ApiMessage {
2930            role: ApiRole::User,
2931            content: Some("Hello".to_string()),
2932            reasoning_content: None,
2933            tool_calls: None,
2934            tool_call_id: None,
2935        }];
2936
2937        let request = ApiChatRequestStreaming {
2938            model: "kimi-k2-thinking",
2939            messages: &messages,
2940            max_completion_tokens: Some(1024),
2941            max_tokens: Some(1024),
2942            tools: None,
2943            tool_choice: None,
2944            reasoning: None,
2945            response_format: None,
2946            stream_options: None,
2947            usage: None,
2948            stream: true,
2949        };
2950
2951        let json = serde_json::to_string(&request).unwrap();
2952        assert!(json.contains("\"max_completion_tokens\":1024"));
2953        assert!(json.contains("\"max_tokens\":1024"));
2954        assert!(!json.contains("\"stream_options\""));
2955    }
2956
2957    #[test]
2958    fn test_request_serialization_includes_reasoning_when_present() {
2959        let messages = vec![ApiMessage {
2960            role: ApiRole::User,
2961            content: Some("Hello".to_string()),
2962            reasoning_content: None,
2963            tool_calls: None,
2964            tool_call_id: None,
2965        }];
2966
2967        let request = ApiChatRequest {
2968            model: MODEL_GPT54,
2969            messages: &messages,
2970            max_completion_tokens: Some(1024),
2971            max_tokens: None,
2972            tools: None,
2973            tool_choice: None,
2974            reasoning: Some(ApiReasoning {
2975                effort: ReasoningEffort::High,
2976            }),
2977            response_format: None,
2978        };
2979
2980        let json = serde_json::to_string(&request).unwrap();
2981        assert!(json.contains("\"reasoning\":{\"effort\":\"high\"}"));
2982    }
2983
2984    #[test]
2985    fn test_response_format_serializes_as_json_schema() {
2986        let messages = vec![ApiMessage {
2987            role: ApiRole::User,
2988            content: Some("Hello".to_string()),
2989            reasoning_content: None,
2990            tool_calls: None,
2991            tool_call_id: None,
2992        }];
2993
2994        let response_format = Some(ApiResponseFormat::from_response_format(
2995            &agent_sdk_foundation::llm::ResponseFormat::new(
2996                "person",
2997                serde_json::json!({"type": "object"}),
2998            ),
2999        ));
3000
3001        let request = ApiChatRequest {
3002            model: "gpt-4o",
3003            messages: &messages,
3004            max_completion_tokens: Some(1024),
3005            max_tokens: None,
3006            tools: None,
3007            tool_choice: None,
3008            reasoning: None,
3009            response_format,
3010        };
3011
3012        let json = serde_json::to_value(&request).unwrap();
3013        assert_eq!(json["response_format"]["type"], "json_schema");
3014        assert_eq!(json["response_format"]["json_schema"]["name"], "person");
3015        assert_eq!(json["response_format"]["json_schema"]["strict"], true);
3016        assert_eq!(
3017            json["response_format"]["json_schema"]["schema"]["type"],
3018            "object"
3019        );
3020    }
3021
3022    #[test]
3023    fn test_step_completion_stream_emits_trailing_usage_after_finish_reason() {
3024        // Official OpenAI with stream_options.include_usage sends the usage in a
3025        // SEPARATE chunk (choices: []) AFTER the finish_reason chunk, then [DONE].
3026        // The streaming loop must keep consuming past finish_reason so that usage
3027        // is captured and emitted (previously it returned early on Done, dropping
3028        // the usage entirely).
3029        let mut tool_calls: HashMap<usize, ToolCallAccumulator> = HashMap::new();
3030        let mut usage: Option<Usage> = None;
3031        let mut stop_reason: Option<StopReason> = None;
3032
3033        // Chunk 1: text delta + finish_reason — must NOT finalize.
3034        let o1 = step_completion_stream(
3035            r#"{"choices":[{"delta":{"content":"hi"},"finish_reason":"stop"}]}"#,
3036            &mut tool_calls,
3037            &mut usage,
3038            &mut stop_reason,
3039        );
3040        assert!(o1.terminal.is_none());
3041        assert!(matches!(stop_reason, Some(StopReason::EndTurn)));
3042
3043        // Chunk 2: usage-only trailing chunk (choices: []).
3044        let o2 = step_completion_stream(
3045            r#"{"choices":[],"usage":{"prompt_tokens":10,"completion_tokens":5}}"#,
3046            &mut tool_calls,
3047            &mut usage,
3048            &mut stop_reason,
3049        );
3050        assert!(o2.terminal.is_none());
3051
3052        // Chunk 3: [DONE] sentinel finalizes and must carry the trailing usage.
3053        let o3 = step_completion_stream("[DONE]", &mut tool_calls, &mut usage, &mut stop_reason);
3054        let terminal = o3.terminal.expect("[DONE] finalizes the stream");
3055        assert!(terminal.iter().any(|d| matches!(
3056            d,
3057            StreamDelta::Usage(Usage {
3058                input_tokens: 10,
3059                output_tokens: 5,
3060                ..
3061            })
3062        )));
3063        assert!(terminal.iter().any(|d| matches!(
3064            d,
3065            StreamDelta::Done {
3066                stop_reason: Some(StopReason::EndTurn)
3067            }
3068        )));
3069    }
3070
3071    #[test]
3072    fn test_response_format_omitted_when_absent() {
3073        let messages = vec![ApiMessage {
3074            role: ApiRole::User,
3075            content: Some("Hello".to_string()),
3076            reasoning_content: None,
3077            tool_calls: None,
3078            tool_call_id: None,
3079        }];
3080
3081        let request = ApiChatRequest {
3082            model: "gpt-4o",
3083            messages: &messages,
3084            max_completion_tokens: Some(1024),
3085            max_tokens: None,
3086            tools: None,
3087            tool_choice: None,
3088            reasoning: None,
3089            response_format: None,
3090        };
3091
3092        let json = serde_json::to_string(&request).unwrap();
3093        assert!(!json.contains("response_format"));
3094    }
3095}