construct/providers/
compatible.rs

1//! Generic OpenAI-compatible provider.
2//! Most LLM APIs follow the same `/v1/chat/completions` format.
3//! This module provides a single implementation that works for all of them.
4
5use crate::multimodal;
6use crate::providers::traits::{
7    ChatMessage, ChatRequest as ProviderChatRequest, ChatResponse as ProviderChatResponse,
8    Provider, StreamChunk, StreamError, StreamEvent, StreamOptions, StreamResult, TokenUsage,
9    ToolCall as ProviderToolCall,
10};
11use async_trait::async_trait;
12use futures_util::{StreamExt, stream};
13use reqwest::{
14    Client,
15    header::{HeaderMap, HeaderValue, USER_AGENT},
16};
17use serde::{Deserialize, Serialize};
18
19/// A provider that speaks the OpenAI-compatible chat completions API.
20/// Used by: Venice, Vercel AI Gateway, Cloudflare AI Gateway, Moonshot,
21/// Synthetic, `OpenCode` Zen, `OpenCode` Go, `Z.AI`, `GLM`, `MiniMax`, Bedrock, Qianfan, Groq, Mistral, `xAI`, etc.
22#[allow(clippy::struct_excessive_bools)]
23pub struct OpenAiCompatibleProvider {
24    pub(crate) name: String,
25    pub(crate) base_url: String,
26    pub(crate) credential: Option<String>,
27    pub(crate) auth_header: AuthStyle,
28    supports_vision: bool,
29    /// When false, do not fall back to /v1/responses on chat completions 404.
30    /// GLM/Zhipu does not support the responses API.
31    supports_responses_fallback: bool,
32    user_agent: Option<String>,
33    /// When true, collect all `system` messages and prepend their content
34    /// to the first `user` message, then drop the system messages.
35    /// Required for providers that reject `role: system` (e.g. MiniMax).
36    merge_system_into_user: bool,
37    /// Whether this provider supports OpenAI-style native tool calling.
38    /// When false, tools are injected into the system prompt as text.
39    native_tool_calling: bool,
40    /// HTTP request timeout in seconds for LLM API calls. Default: 120.
41    timeout_secs: u64,
42    /// Extra HTTP headers to include in all API requests.
43    extra_headers: std::collections::HashMap<String, String>,
44    /// Optional reasoning effort for GPT-5/Codex-compatible backends.
45    reasoning_effort: Option<String>,
46    /// Custom API path suffix (e.g. "/v2/generate").
47    /// When set, overrides the default `/chat/completions` path detection.
48    api_path: Option<String>,
49    /// Maximum output tokens to include in API requests.
50    max_tokens: Option<u32>,
51}
52
53/// How the provider expects the API key to be sent.
54#[derive(Debug, Clone)]
55pub enum AuthStyle {
56    /// `Authorization: Bearer <key>`
57    Bearer,
58    /// `x-api-key: <key>` (used by some Chinese providers)
59    XApiKey,
60    /// Custom header name
61    Custom(String),
62}
63
64impl OpenAiCompatibleProvider {
65    pub fn new(
66        name: &str,
67        base_url: &str,
68        credential: Option<&str>,
69        auth_style: AuthStyle,
70    ) -> Self {
71        Self::new_with_options(
72            name, base_url, credential, auth_style, false, true, None, false,
73        )
74    }
75
76    pub fn new_with_vision(
77        name: &str,
78        base_url: &str,
79        credential: Option<&str>,
80        auth_style: AuthStyle,
81        supports_vision: bool,
82    ) -> Self {
83        Self::new_with_options(
84            name,
85            base_url,
86            credential,
87            auth_style,
88            supports_vision,
89            true,
90            None,
91            false,
92        )
93    }
94
95    /// Same as `new` but skips the /v1/responses fallback on 404.
96    /// Use for providers (e.g. GLM) that only support chat completions.
97    pub fn new_no_responses_fallback(
98        name: &str,
99        base_url: &str,
100        credential: Option<&str>,
101        auth_style: AuthStyle,
102    ) -> Self {
103        Self::new_with_options(
104            name, base_url, credential, auth_style, false, false, None, false,
105        )
106    }
107
108    /// Create a provider with a custom User-Agent header.
109    ///
110    /// Some providers (for example Kimi Code) require a specific User-Agent
111    /// for request routing and policy enforcement.
112    pub fn new_with_user_agent(
113        name: &str,
114        base_url: &str,
115        credential: Option<&str>,
116        auth_style: AuthStyle,
117        user_agent: &str,
118    ) -> Self {
119        Self::new_with_options(
120            name,
121            base_url,
122            credential,
123            auth_style,
124            false,
125            true,
126            Some(user_agent),
127            false,
128        )
129    }
130
131    pub fn new_with_user_agent_and_vision(
132        name: &str,
133        base_url: &str,
134        credential: Option<&str>,
135        auth_style: AuthStyle,
136        user_agent: &str,
137        supports_vision: bool,
138    ) -> Self {
139        Self::new_with_options(
140            name,
141            base_url,
142            credential,
143            auth_style,
144            supports_vision,
145            true,
146            Some(user_agent),
147            false,
148        )
149    }
150
151    /// For providers that do not support `role: system` (e.g. MiniMax).
152    /// System prompt content is prepended to the first user message instead.
153    pub fn new_merge_system_into_user(
154        name: &str,
155        base_url: &str,
156        credential: Option<&str>,
157        auth_style: AuthStyle,
158    ) -> Self {
159        Self::new_with_options(
160            name, base_url, credential, auth_style, false, false, None, true,
161        )
162    }
163
164    fn new_with_options(
165        name: &str,
166        base_url: &str,
167        credential: Option<&str>,
168        auth_style: AuthStyle,
169        supports_vision: bool,
170        supports_responses_fallback: bool,
171        user_agent: Option<&str>,
172        merge_system_into_user: bool,
173    ) -> Self {
174        Self {
175            name: name.to_string(),
176            base_url: base_url.trim_end_matches('/').to_string(),
177            credential: credential.map(ToString::to_string),
178            auth_header: auth_style,
179            supports_vision,
180            supports_responses_fallback,
181            user_agent: user_agent.map(ToString::to_string),
182            merge_system_into_user,
183            native_tool_calling: !merge_system_into_user,
184            timeout_secs: 120,
185            extra_headers: std::collections::HashMap::new(),
186            reasoning_effort: None,
187            api_path: None,
188            max_tokens: None,
189        }
190    }
191
192    /// Disable native tool calling, forcing prompt-guided tool use instead.
193    pub fn without_native_tools(mut self) -> Self {
194        self.native_tool_calling = false;
195        self
196    }
197
198    /// Override the HTTP request timeout for LLM API calls.
199    pub fn with_timeout_secs(mut self, timeout_secs: u64) -> Self {
200        self.timeout_secs = timeout_secs;
201        self
202    }
203
204    /// Set extra HTTP headers to include in all API requests.
205    pub fn with_extra_headers(
206        mut self,
207        headers: std::collections::HashMap<String, String>,
208    ) -> Self {
209        self.extra_headers = headers;
210        self
211    }
212
213    /// Set reasoning effort for GPT-5/Codex-compatible chat-completions APIs.
214    pub fn with_reasoning_effort(mut self, reasoning_effort: Option<String>) -> Self {
215        self.reasoning_effort = reasoning_effort;
216        self
217    }
218
219    /// Set a custom API path suffix for this provider.
220    /// When set, replaces the default `/chat/completions` path.
221    pub fn with_api_path(mut self, api_path: Option<String>) -> Self {
222        self.api_path = api_path;
223        self
224    }
225
226    /// Set the maximum output tokens for API requests.
227    pub fn with_max_tokens(mut self, max_tokens: Option<u32>) -> Self {
228        self.max_tokens = max_tokens;
229        self
230    }
231
232    /// Collect all `system` role messages, concatenate their content,
233    /// and prepend to the first `user` message. Drop all system messages.
234    /// Used for providers (e.g. MiniMax) that reject `role: system`.
235    fn flatten_system_messages(messages: &[ChatMessage]) -> Vec<ChatMessage> {
236        let system_content: String = messages
237            .iter()
238            .filter(|m| m.role == "system")
239            .map(|m| m.content.as_str())
240            .collect::<Vec<_>>()
241            .join("\n\n");
242
243        if system_content.is_empty() {
244            return messages.to_vec();
245        }
246
247        let mut result: Vec<ChatMessage> = messages
248            .iter()
249            .filter(|m| m.role != "system")
250            .cloned()
251            .collect();
252
253        if let Some(first_user) = result.iter_mut().find(|m| m.role == "user") {
254            first_user.content = format!("{system_content}\n\n{}", first_user.content);
255        } else {
256            // No user message found: insert a synthetic user message with system content
257            result.insert(0, ChatMessage::user(&system_content));
258        }
259
260        result
261    }
262
263    fn http_client(&self) -> Client {
264        let timeout = self.timeout_secs;
265        let has_user_agent = self.user_agent.is_some();
266        let has_extra_headers = !self.extra_headers.is_empty();
267
268        if has_user_agent || has_extra_headers {
269            let mut headers = HeaderMap::new();
270            if let Some(ua) = self.user_agent.as_deref() {
271                if let Ok(value) = HeaderValue::from_str(ua) {
272                    headers.insert(USER_AGENT, value);
273                }
274            }
275            for (key, value) in &self.extra_headers {
276                match (
277                    reqwest::header::HeaderName::from_bytes(key.as_bytes()),
278                    HeaderValue::from_str(value),
279                ) {
280                    (Ok(name), Ok(val)) => {
281                        headers.insert(name, val);
282                    }
283                    _ => {
284                        tracing::warn!(header = key, "Skipping invalid extra header name or value");
285                    }
286                }
287            }
288
289            let builder = Client::builder()
290                .timeout(std::time::Duration::from_secs(timeout))
291                .connect_timeout(std::time::Duration::from_secs(10))
292                .default_headers(headers);
293            let builder =
294                crate::config::apply_runtime_proxy_to_builder(builder, "provider.compatible");
295
296            return builder.build().unwrap_or_else(|error| {
297                tracing::warn!(
298                    "Failed to build proxied timeout client with custom headers: {error}"
299                );
300                Client::new()
301            });
302        }
303
304        crate::config::build_runtime_proxy_client_with_timeouts("provider.compatible", timeout, 10)
305    }
306
307    /// Build the full URL for chat completions, detecting if base_url already includes the path.
308    /// This allows custom providers with non-standard endpoints (e.g., VolcEngine ARK uses
309    /// `/api/coding/v3/chat/completions` instead of `/v1/chat/completions`).
310    fn chat_completions_url(&self) -> String {
311        // If a custom api_path is configured, use it directly.
312        if let Some(ref api_path) = self.api_path {
313            let separator = if api_path.starts_with('/') { "" } else { "/" };
314            return format!("{}{separator}{api_path}", self.base_url);
315        }
316
317        let has_full_endpoint = reqwest::Url::parse(&self.base_url)
318            .map(|url| {
319                url.path()
320                    .trim_end_matches('/')
321                    .ends_with("/chat/completions")
322            })
323            .unwrap_or_else(|_| {
324                self.base_url
325                    .trim_end_matches('/')
326                    .ends_with("/chat/completions")
327            });
328
329        if has_full_endpoint {
330            self.base_url.clone()
331        } else {
332            format!("{}/chat/completions", self.base_url)
333        }
334    }
335
336    fn path_ends_with(&self, suffix: &str) -> bool {
337        if let Ok(url) = reqwest::Url::parse(&self.base_url) {
338            return url.path().trim_end_matches('/').ends_with(suffix);
339        }
340
341        self.base_url.trim_end_matches('/').ends_with(suffix)
342    }
343
344    fn has_explicit_api_path(&self) -> bool {
345        let Ok(url) = reqwest::Url::parse(&self.base_url) else {
346            return false;
347        };
348
349        let path = url.path().trim_end_matches('/');
350        !path.is_empty() && path != "/"
351    }
352
353    fn requires_tool_stream(&self) -> bool {
354        let host_requires_tool_stream = reqwest::Url::parse(&self.base_url)
355            .ok()
356            .and_then(|url| url.host_str().map(str::to_ascii_lowercase))
357            .is_some_and(|host| host == "api.z.ai" || host.ends_with(".z.ai"));
358
359        host_requires_tool_stream || matches!(self.name.as_str(), "zai" | "z.ai")
360    }
361
362    fn tool_stream_for_tools(&self, has_tools: bool) -> Option<bool> {
363        if has_tools && self.requires_tool_stream() {
364            Some(true)
365        } else {
366            None
367        }
368    }
369
370    /// Build the full URL for responses API, detecting if base_url already includes the path.
371    fn responses_url(&self) -> String {
372        if self.path_ends_with("/responses") {
373            return self.base_url.clone();
374        }
375
376        let normalized_base = self.base_url.trim_end_matches('/');
377
378        // If chat endpoint is explicitly configured, derive sibling responses endpoint.
379        if let Some(prefix) = normalized_base.strip_suffix("/chat/completions") {
380            return format!("{prefix}/responses");
381        }
382
383        // If an explicit API path already exists (e.g. /v1, /openai, /api/coding/v3),
384        // append responses directly to avoid duplicate /v1 segments.
385        if self.has_explicit_api_path() {
386            format!("{normalized_base}/responses")
387        } else {
388            format!("{normalized_base}/v1/responses")
389        }
390    }
391
392    fn tool_specs_to_openai_format(tools: &[crate::tools::ToolSpec]) -> Vec<serde_json::Value> {
393        tools
394            .iter()
395            .map(|tool| {
396                serde_json::json!({
397                    "type": "function",
398                    "function": {
399                        "name": tool.name,
400                        "description": tool.description,
401                        "parameters": tool.parameters
402                    }
403                })
404            })
405            .collect()
406    }
407
408    fn reasoning_effort_for_model(&self, model: &str) -> Option<String> {
409        let id = model.rsplit('/').next().unwrap_or(model);
410        let supports_reasoning_effort = id.starts_with("gpt-5") || id.contains("codex");
411        supports_reasoning_effort
412            .then(|| self.reasoning_effort.clone())
413            .flatten()
414    }
415}
416
417#[derive(Debug, Serialize)]
418struct ApiChatRequest {
419    model: String,
420    messages: Vec<Message>,
421    temperature: f64,
422    #[serde(skip_serializing_if = "Option::is_none")]
423    stream: Option<bool>,
424    #[serde(skip_serializing_if = "Option::is_none")]
425    stream_options: Option<StreamOptionsPayload>,
426    #[serde(skip_serializing_if = "Option::is_none")]
427    reasoning_effort: Option<String>,
428    #[serde(skip_serializing_if = "Option::is_none")]
429    tool_stream: Option<bool>,
430    #[serde(skip_serializing_if = "Option::is_none")]
431    tools: Option<Vec<serde_json::Value>>,
432    #[serde(skip_serializing_if = "Option::is_none")]
433    tool_choice: Option<String>,
434    #[serde(skip_serializing_if = "Option::is_none")]
435    max_tokens: Option<u32>,
436}
437
438/// OpenAI chat-completions `stream_options` payload. Opting in to
439/// `include_usage` instructs the server to emit a final chunk carrying the
440/// token usage after the text stream completes.
441#[derive(Debug, Serialize)]
442struct StreamOptionsPayload {
443    include_usage: bool,
444}
445
446#[derive(Debug, Serialize)]
447struct Message {
448    role: String,
449    content: MessageContent,
450}
451
452#[derive(Debug, Serialize)]
453#[serde(untagged)]
454enum MessageContent {
455    Text(String),
456    Parts(Vec<MessagePart>),
457}
458
459#[derive(Debug, Serialize)]
460#[serde(tag = "type", rename_all = "snake_case")]
461enum MessagePart {
462    Text { text: String },
463    ImageUrl { image_url: ImageUrlPart },
464}
465
466#[derive(Debug, Serialize)]
467struct ImageUrlPart {
468    url: String,
469}
470
471#[derive(Debug, Deserialize)]
472struct ApiChatResponse {
473    choices: Vec<Choice>,
474    #[serde(default)]
475    usage: Option<UsageInfo>,
476}
477
478#[derive(Debug, Deserialize)]
479struct UsageInfo {
480    #[serde(default)]
481    prompt_tokens: Option<u64>,
482    #[serde(default)]
483    completion_tokens: Option<u64>,
484}
485
486#[derive(Debug, Deserialize)]
487struct Choice {
488    message: ResponseMessage,
489}
490
491/// Remove `<think>...</think>` blocks from model output.
492/// Some reasoning models (e.g. MiniMax) embed their chain-of-thought inline
493/// in the `content` field rather than a separate `reasoning_content` field.
494/// The resulting `<think>` tags must be stripped before returning to the user.
495fn strip_think_tags(s: &str) -> String {
496    let mut result = String::with_capacity(s.len());
497    let mut rest = s;
498    loop {
499        if let Some(start) = rest.find("<think>") {
500            result.push_str(&rest[..start]);
501            if let Some(end) = rest[start..].find("</think>") {
502                rest = &rest[start + end + "</think>".len()..];
503            } else {
504                // Unclosed tag: drop the rest to avoid leaking partial reasoning.
505                break;
506            }
507        } else {
508            result.push_str(rest);
509            break;
510        }
511    }
512    result.trim().to_string()
513}
514
515#[derive(Debug, Deserialize, Serialize)]
516struct ResponseMessage {
517    #[serde(default)]
518    content: Option<String>,
519    /// Reasoning/thinking models (e.g. Qwen3, GLM-4) may return their output
520    /// in `reasoning_content` instead of `content`. Used as automatic fallback.
521    #[serde(default)]
522    reasoning_content: Option<String>,
523    #[serde(default)]
524    tool_calls: Option<Vec<ToolCall>>,
525}
526
527impl ResponseMessage {
528    /// Extract text content, falling back to `reasoning_content` when `content`
529    /// is missing or empty. Reasoning/thinking models (Qwen3, GLM-4, etc.)
530    /// often return their output solely in `reasoning_content`.
531    /// Strips `<think>...</think>` blocks that some models (e.g. MiniMax) embed
532    /// inline in `content` instead of using a separate field.
533    fn effective_content(&self) -> String {
534        if let Some(content) = self.content.as_ref().filter(|c| !c.is_empty()) {
535            let stripped = strip_think_tags(content);
536            if !stripped.is_empty() {
537                return stripped;
538            }
539        }
540
541        self.reasoning_content
542            .as_ref()
543            .map(|c| strip_think_tags(c))
544            .filter(|c| !c.is_empty())
545            .unwrap_or_default()
546    }
547
548    fn effective_content_optional(&self) -> Option<String> {
549        if let Some(content) = self.content.as_ref().filter(|c| !c.is_empty()) {
550            let stripped = strip_think_tags(content);
551            if !stripped.is_empty() {
552                return Some(stripped);
553            }
554        }
555
556        self.reasoning_content
557            .as_ref()
558            .map(|c| strip_think_tags(c))
559            .filter(|c| !c.is_empty())
560    }
561}
562
563#[derive(Debug, Deserialize, Serialize)]
564struct ToolCall {
565    #[serde(skip_serializing_if = "Option::is_none")]
566    id: Option<String>,
567    #[serde(rename = "type")]
568    #[serde(default, skip_serializing_if = "Option::is_none")]
569    kind: Option<String>,
570    #[serde(default, skip_serializing_if = "Option::is_none")]
571    function: Option<Function>,
572
573    // Compatibility: Some providers (e.g., older GLM) may use 'name' directly
574    #[serde(default, skip_serializing_if = "Option::is_none")]
575    name: Option<String>,
576    #[serde(default, skip_serializing_if = "Option::is_none")]
577    arguments: Option<String>,
578
579    // Compatibility: DeepSeek sometimes wraps arguments differently
580    #[serde(
581        rename = "parameters",
582        default,
583        skip_serializing_if = "Option::is_none"
584    )]
585    parameters: Option<serde_json::Value>,
586}
587
588impl ToolCall {
589    /// Extract function name with fallback logic for various provider formats
590    fn function_name(&self) -> Option<String> {
591        // Standard OpenAI format: tool_calls[].function.name
592        if let Some(ref func) = self.function {
593            if let Some(ref name) = func.name {
594                return Some(name.clone());
595            }
596        }
597        // Fallback: direct name field
598        self.name.clone()
599    }
600
601    /// Extract arguments with fallback logic and type conversion
602    fn function_arguments(&self) -> Option<String> {
603        // Standard OpenAI format: tool_calls[].function.arguments (string)
604        if let Some(ref func) = self.function {
605            if let Some(ref args) = func.arguments {
606                return Some(args.clone());
607            }
608        }
609        // Fallback: direct arguments field
610        if let Some(ref args) = self.arguments {
611            return Some(args.clone());
612        }
613        // Compatibility: Some providers return parameters as object instead of string
614        if let Some(ref params) = self.parameters {
615            return serde_json::to_string(params).ok();
616        }
617        None
618    }
619}
620
621#[derive(Debug, Deserialize, Serialize)]
622struct Function {
623    #[serde(default)]
624    name: Option<String>,
625    #[serde(default)]
626    arguments: Option<String>,
627}
628
629#[derive(Debug, Serialize)]
630struct NativeChatRequest {
631    model: String,
632    messages: Vec<NativeMessage>,
633    temperature: f64,
634    #[serde(skip_serializing_if = "Option::is_none")]
635    stream: Option<bool>,
636    #[serde(skip_serializing_if = "Option::is_none")]
637    stream_options: Option<StreamOptionsPayload>,
638    #[serde(skip_serializing_if = "Option::is_none")]
639    reasoning_effort: Option<String>,
640    #[serde(skip_serializing_if = "Option::is_none")]
641    tool_stream: Option<bool>,
642    #[serde(skip_serializing_if = "Option::is_none")]
643    tools: Option<Vec<serde_json::Value>>,
644    #[serde(skip_serializing_if = "Option::is_none")]
645    tool_choice: Option<String>,
646    #[serde(skip_serializing_if = "Option::is_none")]
647    max_tokens: Option<u32>,
648}
649
650#[derive(Debug, Serialize)]
651struct NativeMessage {
652    role: String,
653    #[serde(skip_serializing_if = "Option::is_none")]
654    content: Option<MessageContent>,
655    #[serde(skip_serializing_if = "Option::is_none")]
656    tool_call_id: Option<String>,
657    #[serde(skip_serializing_if = "Option::is_none")]
658    tool_calls: Option<Vec<ToolCall>>,
659    /// Raw reasoning content from thinking models; pass-through for providers
660    /// that require it in assistant tool-call history messages.
661    #[serde(skip_serializing_if = "Option::is_none")]
662    reasoning_content: Option<String>,
663}
664
665#[derive(Debug, Serialize)]
666struct ResponsesRequest {
667    model: String,
668    input: Vec<ResponsesInput>,
669    #[serde(skip_serializing_if = "Option::is_none")]
670    instructions: Option<String>,
671    #[serde(skip_serializing_if = "Option::is_none")]
672    stream: Option<bool>,
673}
674
675#[derive(Debug, Serialize)]
676struct ResponsesInput {
677    role: String,
678    content: ResponsesInputContent,
679    #[serde(rename = "type", skip_serializing_if = "Option::is_none")]
680    kind: Option<String>,
681}
682
683#[derive(Debug, Serialize)]
684#[serde(untagged)]
685enum ResponsesInputContent {
686    Text(String),
687    Parts(Vec<ResponsesInputPart>),
688}
689
690#[derive(Debug, Serialize)]
691struct ResponsesInputPart {
692    #[serde(rename = "type")]
693    kind: String,
694    text: String,
695}
696
697impl ResponsesInput {
698    fn user_text(content: String) -> Self {
699        Self {
700            role: "user".to_string(),
701            content: ResponsesInputContent::Text(content),
702            kind: None,
703        }
704    }
705
706    fn assistant_output_text(content: String) -> Self {
707        Self {
708            role: "assistant".to_string(),
709            content: ResponsesInputContent::Parts(vec![ResponsesInputPart {
710                kind: "output_text".to_string(),
711                text: content,
712            }]),
713            kind: Some("message".to_string()),
714        }
715    }
716}
717
718#[derive(Debug, Deserialize)]
719struct ResponsesResponse {
720    #[serde(default)]
721    output: Vec<ResponsesOutput>,
722    #[serde(default)]
723    output_text: Option<String>,
724}
725
726#[derive(Debug, Deserialize)]
727struct ResponsesOutput {
728    #[serde(default)]
729    content: Vec<ResponsesContent>,
730}
731
732#[derive(Debug, Deserialize)]
733struct ResponsesContent {
734    #[serde(rename = "type")]
735    kind: Option<String>,
736    text: Option<String>,
737}
738
739// ---------------------------------------------------------------
740// Streaming support (SSE parser)
741// ---------------------------------------------------------------
742
743/// Server-Sent Event stream chunk for OpenAI-compatible streaming.
744#[derive(Debug, Deserialize)]
745struct StreamChunkResponse {
746    #[serde(default)]
747    choices: Vec<StreamChoice>,
748    /// Final usage chunk from servers that honor `stream_options.include_usage`.
749    /// Earlier chunks have `choices` populated and `usage` null; the trailing
750    /// usage chunk flips this — `choices` is empty and `usage` carries totals.
751    #[serde(default)]
752    usage: Option<StreamUsageInfo>,
753}
754
755#[derive(Debug, Deserialize)]
756struct StreamUsageInfo {
757    #[serde(default)]
758    prompt_tokens: Option<u64>,
759    #[serde(default)]
760    completion_tokens: Option<u64>,
761    #[serde(default)]
762    prompt_tokens_details: Option<StreamPromptTokensDetails>,
763}
764
765#[derive(Debug, Deserialize)]
766struct StreamPromptTokensDetails {
767    #[serde(default)]
768    cached_tokens: Option<u64>,
769}
770
771#[derive(Debug, Deserialize)]
772struct StreamChoice {
773    #[serde(default)]
774    delta: StreamDelta,
775    #[serde(default)]
776    finish_reason: Option<String>,
777}
778
779#[derive(Debug, Deserialize, Default)]
780struct StreamDelta {
781    #[serde(default)]
782    content: Option<String>,
783    /// Reasoning/thinking models may stream output via `reasoning_content`.
784    #[serde(default)]
785    reasoning_content: Option<String>,
786    /// Native tool-calling deltas in OpenAI chat-completions streaming format.
787    #[serde(default)]
788    tool_calls: Option<Vec<StreamToolCallDelta>>,
789}
790
791#[derive(Debug, Deserialize)]
792struct StreamToolCallDelta {
793    #[serde(default)]
794    index: Option<usize>,
795    #[serde(default)]
796    id: Option<String>,
797    #[serde(default)]
798    function: Option<StreamFunctionDelta>,
799    // Compatibility: some providers stream name/arguments at top-level.
800    #[serde(default)]
801    name: Option<String>,
802    #[serde(default)]
803    arguments: Option<String>,
804}
805
806#[derive(Debug, Deserialize)]
807struct StreamFunctionDelta {
808    #[serde(default)]
809    name: Option<String>,
810    #[serde(default)]
811    arguments: Option<String>,
812}
813
814#[derive(Debug, Default)]
815struct StreamToolCallAccumulator {
816    id: Option<String>,
817    name: Option<String>,
818    arguments: String,
819}
820
821impl StreamToolCallAccumulator {
822    fn apply_delta(&mut self, delta: &StreamToolCallDelta) {
823        if let Some(id) = delta.id.as_ref().filter(|value| !value.is_empty()) {
824            self.id = Some(id.clone());
825        }
826
827        let delta_name = delta
828            .function
829            .as_ref()
830            .and_then(|function| function.name.as_ref())
831            .or(delta.name.as_ref())
832            .filter(|value| !value.is_empty());
833        if let Some(name) = delta_name {
834            self.name = Some(name.clone());
835        }
836
837        if let Some(arguments_delta) = delta
838            .function
839            .as_ref()
840            .and_then(|function| function.arguments.as_ref())
841            .or(delta.arguments.as_ref())
842            .filter(|value| !value.is_empty())
843        {
844            self.arguments.push_str(arguments_delta);
845        }
846    }
847
848    fn into_provider_tool_call(self) -> Option<ProviderToolCall> {
849        let name = self.name?;
850        let arguments = if self.arguments.trim().is_empty() {
851            "{}".to_string()
852        } else {
853            self.arguments
854        };
855        let normalized_arguments = if serde_json::from_str::<serde_json::Value>(&arguments).is_ok()
856        {
857            arguments
858        } else {
859            tracing::warn!(
860                function = %name,
861                arguments = %arguments,
862                "Invalid JSON in streamed native tool-call arguments, using empty object"
863            );
864            "{}".to_string()
865        };
866
867        Some(ProviderToolCall {
868            id: self.id.unwrap_or_else(|| uuid::Uuid::new_v4().to_string()),
869            name,
870            arguments: normalized_arguments,
871        })
872    }
873}
874
875fn parse_sse_chunk(line: &str) -> StreamResult<Option<StreamChunkResponse>> {
876    let line = line.trim();
877
878    if line.is_empty() || line.starts_with(':') {
879        return Ok(None);
880    }
881
882    let Some(data) = line.strip_prefix("data:") else {
883        return Ok(None);
884    };
885    let data = data.trim();
886
887    if data == "[DONE]" {
888        return Ok(None);
889    }
890
891    serde_json::from_str(data)
892        .map(Some)
893        .map_err(StreamError::Json)
894}
895
896/// Parse custom proxy tool events from SSE lines.
897/// These are emitted by proxies like claude-max-api-proxy that execute tools
898/// internally and forward observability events via custom SSE fields.
899fn parse_proxy_tool_event(line: &str) -> Option<StreamEvent> {
900    let data = line.trim().strip_prefix("data:")?.trim();
901    let obj: serde_json::Value = serde_json::from_str(data).ok()?;
902
903    if let Some(ts) = obj.get("x_tool_start") {
904        let Some(name) = ts.get("name").and_then(|v| v.as_str()) else {
905            tracing::debug!("proxy x_tool_start event missing required 'name' field");
906            return None;
907        };
908        let name = name.to_string();
909        let args = ts
910            .get("arguments")
911            .and_then(|v| v.as_str())
912            .unwrap_or("{}")
913            .to_string();
914        return Some(StreamEvent::PreExecutedToolCall { name, args });
915    }
916
917    if let Some(tr) = obj.get("x_tool_result") {
918        let name = tr
919            .get("name")
920            .and_then(|v| v.as_str())
921            .unwrap_or("unknown")
922            .to_string();
923        let output = tr
924            .get("output")
925            .and_then(|v| v.as_str())
926            .unwrap_or("")
927            .to_string();
928        return Some(StreamEvent::PreExecutedToolResult { name, output });
929    }
930
931    None
932}
933
934fn extract_sse_text_delta(choice: &StreamChoice) -> Option<String> {
935    if let Some(content) = &choice.delta.content {
936        if !content.is_empty() {
937            return Some(content.clone());
938        }
939    }
940
941    choice
942        .delta
943        .reasoning_content
944        .as_ref()
945        .filter(|value| !value.is_empty())
946        .cloned()
947}
948
949/// Parse SSE (Server-Sent Events) stream from OpenAI-compatible providers.
950/// Handles the `data: {...}` format and `[DONE]` sentinel.
951///
952/// Returns a `StreamChunk` that distinguishes content from reasoning:
953/// - Content deltas → `StreamChunk::delta`
954/// - Reasoning deltas → `StreamChunk::reasoning`
955fn parse_sse_line(line: &str) -> StreamResult<Option<StreamChunk>> {
956    let chunk = match parse_sse_chunk(line)? {
957        Some(c) => c,
958        None => return Ok(None),
959    };
960
961    if let Some(choice) = chunk.choices.first() {
962        if let Some(content) = &choice.delta.content {
963            if !content.is_empty() {
964                return Ok(Some(StreamChunk::delta(content.clone())));
965            }
966        }
967        if let Some(reasoning) = &choice.delta.reasoning_content {
968            if !reasoning.is_empty() {
969                return Ok(Some(StreamChunk::reasoning(reasoning.clone())));
970            }
971        }
972    }
973
974    Ok(None)
975}
976
977/// Convert SSE byte stream to text chunks.
978fn sse_bytes_to_chunks(
979    response: reqwest::Response,
980    count_tokens: bool,
981) -> stream::BoxStream<'static, StreamResult<StreamChunk>> {
982    let (tx, rx) = tokio::sync::mpsc::channel::<StreamResult<StreamChunk>>(100);
983
984    tokio::spawn(async move {
985        let mut buffer = String::new();
986
987        match response.error_for_status_ref() {
988            Ok(_) => {}
989            Err(e) => {
990                let _ = tx.send(Err(StreamError::Http(e))).await;
991                return;
992            }
993        }
994
995        let mut bytes_stream = response.bytes_stream();
996        // Accumulate partial UTF-8 sequences that may be split across
997        // HTTP/1.1 chunked transfer boundaries (e.g. 3-byte CJK chars).
998        let mut utf8_buf: Vec<u8> = Vec::new();
999
1000        while let Some(item) = bytes_stream.next().await {
1001            match item {
1002                Ok(bytes) => {
1003                    utf8_buf.extend_from_slice(&bytes);
1004                    let text = match std::str::from_utf8(&utf8_buf) {
1005                        Ok(s) => {
1006                            let owned = s.to_string();
1007                            utf8_buf.clear();
1008                            owned
1009                        }
1010                        Err(e) => {
1011                            let valid_up_to = e.valid_up_to();
1012                            if valid_up_to == 0 && utf8_buf.len() < 4 {
1013                                // Could still be an incomplete multi-byte char; wait for more data
1014                                continue;
1015                            }
1016                            let valid =
1017                                String::from_utf8_lossy(&utf8_buf[..valid_up_to]).into_owned();
1018                            utf8_buf.drain(..valid_up_to);
1019                            valid
1020                        }
1021                    };
1022                    if text.is_empty() {
1023                        continue;
1024                    }
1025
1026                    buffer.push_str(&text);
1027
1028                    while let Some(pos) = buffer.find('\n') {
1029                        let line = buffer[..pos].to_string();
1030                        buffer.drain(..=pos);
1031
1032                        match parse_sse_line(&line) {
1033                            Ok(Some(chunk)) => {
1034                                let chunk = if count_tokens {
1035                                    chunk.with_token_estimate()
1036                                } else {
1037                                    chunk
1038                                };
1039                                if tx.send(Ok(chunk)).await.is_err() {
1040                                    return; // Receiver dropped
1041                                }
1042                            }
1043                            Ok(None) => {}
1044                            Err(e) => {
1045                                let _ = tx.send(Err(e)).await;
1046                                return;
1047                            }
1048                        }
1049                    }
1050                }
1051                Err(e) => {
1052                    let _ = tx.send(Err(StreamError::Http(e))).await;
1053                    return;
1054                }
1055            }
1056        }
1057
1058        let _ = tx.send(Ok(StreamChunk::final_chunk())).await;
1059    });
1060
1061    stream::unfold(rx, |mut rx| async {
1062        rx.recv().await.map(|chunk| (chunk, rx))
1063    })
1064    .boxed()
1065}
1066
1067/// Convert SSE byte stream to structured streaming events.
1068fn sse_bytes_to_events(
1069    response: reqwest::Response,
1070    count_tokens: bool,
1071) -> stream::BoxStream<'static, StreamResult<StreamEvent>> {
1072    let (tx, rx) = tokio::sync::mpsc::channel::<StreamResult<StreamEvent>>(100);
1073
1074    tokio::spawn(async move {
1075        let mut buffer = String::new();
1076        let mut tool_calls: Vec<StreamToolCallAccumulator> = Vec::new();
1077        let mut emitted_tool_calls = false;
1078
1079        match response.error_for_status_ref() {
1080            Ok(_) => {}
1081            Err(e) => {
1082                let _ = tx.send(Err(StreamError::Http(e))).await;
1083                return;
1084            }
1085        }
1086
1087        let mut bytes_stream = response.bytes_stream();
1088        // Accumulate partial UTF-8 sequences split across chunk boundaries.
1089        let mut utf8_buf: Vec<u8> = Vec::new();
1090        while let Some(item) = bytes_stream.next().await {
1091            match item {
1092                Ok(bytes) => {
1093                    utf8_buf.extend_from_slice(&bytes);
1094                    let text = match std::str::from_utf8(&utf8_buf) {
1095                        Ok(s) => {
1096                            let owned = s.to_string();
1097                            utf8_buf.clear();
1098                            owned
1099                        }
1100                        Err(e) => {
1101                            let valid_up_to = e.valid_up_to();
1102                            if valid_up_to == 0 && utf8_buf.len() < 4 {
1103                                continue;
1104                            }
1105                            let valid =
1106                                String::from_utf8_lossy(&utf8_buf[..valid_up_to]).into_owned();
1107                            utf8_buf.drain(..valid_up_to);
1108                            valid
1109                        }
1110                    };
1111                    if text.is_empty() {
1112                        continue;
1113                    }
1114
1115                    buffer.push_str(&text);
1116
1117                    while let Some(pos) = buffer.find('\n') {
1118                        let line = buffer[..pos].to_string();
1119                        buffer.drain(..=pos);
1120
1121                        // Custom proxy events for pre-executed tool calls
1122                        // (e.g. claude-max-api-proxy streaming x_tool_start/x_tool_result)
1123                        if let Some(event) = parse_proxy_tool_event(&line) {
1124                            if tx.send(Ok(event)).await.is_err() {
1125                                return;
1126                            }
1127                            continue;
1128                        }
1129
1130                        let chunk = match parse_sse_chunk(&line) {
1131                            Ok(Some(chunk)) => chunk,
1132                            Ok(None) => continue,
1133                            Err(e) => {
1134                                let _ = tx.send(Err(e)).await;
1135                                return;
1136                            }
1137                        };
1138
1139                        if let Some(usage) = chunk.usage.as_ref() {
1140                            let token_usage = crate::providers::traits::TokenUsage {
1141                                input_tokens: usage.prompt_tokens,
1142                                output_tokens: usage.completion_tokens,
1143                                cached_input_tokens: usage
1144                                    .prompt_tokens_details
1145                                    .as_ref()
1146                                    .and_then(|d| d.cached_tokens),
1147                            };
1148                            if tx.send(Ok(StreamEvent::Usage(token_usage))).await.is_err() {
1149                                return;
1150                            }
1151                        }
1152
1153                        let mut should_emit_tool_calls = false;
1154                        for choice in &chunk.choices {
1155                            if let Some(text_delta) = extract_sse_text_delta(choice) {
1156                                let mut text_chunk = StreamChunk::delta(text_delta);
1157                                if count_tokens {
1158                                    text_chunk = text_chunk.with_token_estimate();
1159                                }
1160                                if tx
1161                                    .send(Ok(StreamEvent::TextDelta(text_chunk)))
1162                                    .await
1163                                    .is_err()
1164                                {
1165                                    return;
1166                                }
1167                            }
1168
1169                            if let Some(deltas) = choice.delta.tool_calls.as_ref() {
1170                                for delta in deltas {
1171                                    let index = delta.index.unwrap_or(tool_calls.len());
1172                                    if index >= tool_calls.len() {
1173                                        tool_calls.resize_with(index + 1, Default::default);
1174                                    }
1175                                    if let Some(acc) = tool_calls.get_mut(index) {
1176                                        acc.apply_delta(delta);
1177                                    }
1178                                }
1179                            }
1180
1181                            if choice.finish_reason.as_deref() == Some("tool_calls") {
1182                                should_emit_tool_calls = true;
1183                            }
1184                        }
1185
1186                        if should_emit_tool_calls && !emitted_tool_calls {
1187                            emitted_tool_calls = true;
1188                            for tool_call in tool_calls
1189                                .drain(..)
1190                                .filter_map(StreamToolCallAccumulator::into_provider_tool_call)
1191                            {
1192                                if tx.send(Ok(StreamEvent::ToolCall(tool_call))).await.is_err() {
1193                                    return;
1194                                }
1195                            }
1196                        }
1197                    }
1198                }
1199                Err(e) => {
1200                    let _ = tx.send(Err(StreamError::Http(e))).await;
1201                    return;
1202                }
1203            }
1204        }
1205
1206        if !emitted_tool_calls {
1207            for tool_call in tool_calls
1208                .drain(..)
1209                .filter_map(StreamToolCallAccumulator::into_provider_tool_call)
1210            {
1211                if tx.send(Ok(StreamEvent::ToolCall(tool_call))).await.is_err() {
1212                    return;
1213                }
1214            }
1215        }
1216
1217        let _ = tx.send(Ok(StreamEvent::Final)).await;
1218    });
1219
1220    stream::unfold(rx, |mut rx| async move {
1221        rx.recv().await.map(|event| (event, rx))
1222    })
1223    .boxed()
1224}
1225
1226fn first_nonempty(text: Option<&str>) -> Option<String> {
1227    text.and_then(|value| {
1228        let trimmed = value.trim();
1229        if trimmed.is_empty() {
1230            None
1231        } else {
1232            Some(trimmed.to_string())
1233        }
1234    })
1235}
1236
1237fn build_responses_prompt(messages: &[ChatMessage]) -> (Option<String>, Vec<ResponsesInput>) {
1238    let mut instructions_parts = Vec::new();
1239    let mut input = Vec::new();
1240
1241    for message in messages {
1242        if message.content.trim().is_empty() {
1243            continue;
1244        }
1245
1246        if message.role == "system" {
1247            instructions_parts.push(message.content.clone());
1248            continue;
1249        }
1250
1251        let input_item = match message.role.as_str() {
1252            // llama.cpp Responses parser expects assistant history items in
1253            // "output_message" shape (`type=message`, `output_text` parts).
1254            "assistant" | "tool" => ResponsesInput::assistant_output_text(message.content.clone()),
1255            _ => ResponsesInput::user_text(message.content.clone()),
1256        };
1257        input.push(input_item);
1258    }
1259
1260    let instructions = if instructions_parts.is_empty() {
1261        None
1262    } else {
1263        Some(instructions_parts.join("\n\n"))
1264    };
1265
1266    (instructions, input)
1267}
1268
1269fn extract_responses_text(response: ResponsesResponse) -> Option<String> {
1270    if let Some(text) = first_nonempty(response.output_text.as_deref()) {
1271        return Some(text);
1272    }
1273
1274    for item in &response.output {
1275        for content in &item.content {
1276            if content.kind.as_deref() == Some("output_text") {
1277                if let Some(text) = first_nonempty(content.text.as_deref()) {
1278                    return Some(text);
1279                }
1280            }
1281        }
1282    }
1283
1284    for item in &response.output {
1285        for content in &item.content {
1286            if let Some(text) = first_nonempty(content.text.as_deref()) {
1287                return Some(text);
1288            }
1289        }
1290    }
1291
1292    None
1293}
1294
1295fn compact_sanitized_body_snippet(body: &str) -> String {
1296    super::sanitize_api_error(body)
1297        .split_whitespace()
1298        .collect::<Vec<_>>()
1299        .join(" ")
1300}
1301
1302fn parse_chat_response_body(provider_name: &str, body: &str) -> anyhow::Result<ApiChatResponse> {
1303    serde_json::from_str::<ApiChatResponse>(body).map_err(|error| {
1304        let snippet = compact_sanitized_body_snippet(body);
1305        anyhow::anyhow!(
1306            "{provider_name} API returned an unexpected chat-completions payload: {error}; body={snippet}"
1307        )
1308    })
1309}
1310
1311fn parse_responses_response_body(
1312    provider_name: &str,
1313    body: &str,
1314) -> anyhow::Result<ResponsesResponse> {
1315    serde_json::from_str::<ResponsesResponse>(body).map_err(|error| {
1316        let snippet = compact_sanitized_body_snippet(body);
1317        anyhow::anyhow!(
1318            "{provider_name} Responses API returned an unexpected payload: {error}; body={snippet}"
1319        )
1320    })
1321}
1322
1323impl OpenAiCompatibleProvider {
1324    fn apply_auth_header(
1325        &self,
1326        req: reqwest::RequestBuilder,
1327        credential: &str,
1328    ) -> reqwest::RequestBuilder {
1329        match &self.auth_header {
1330            AuthStyle::Bearer => req.header("Authorization", format!("Bearer {credential}")),
1331            AuthStyle::XApiKey => req.header("x-api-key", credential),
1332            AuthStyle::Custom(header) => req.header(header, credential),
1333        }
1334    }
1335
1336    async fn chat_via_responses(
1337        &self,
1338        credential: &str,
1339        messages: &[ChatMessage],
1340        model: &str,
1341    ) -> anyhow::Result<String> {
1342        let (instructions, input) = build_responses_prompt(messages);
1343        if input.is_empty() {
1344            anyhow::bail!(
1345                "{} Responses API fallback requires at least one non-system message",
1346                self.name
1347            );
1348        }
1349
1350        let request = ResponsesRequest {
1351            model: model.to_string(),
1352            input,
1353            instructions,
1354            stream: Some(false),
1355        };
1356
1357        let url = self.responses_url();
1358
1359        let response = self
1360            .apply_auth_header(self.http_client().post(&url).json(&request), credential)
1361            .send()
1362            .await?;
1363
1364        if !response.status().is_success() {
1365            let error = response.text().await?;
1366            anyhow::bail!("{} Responses API error: {error}", self.name);
1367        }
1368
1369        let body = response.text().await?;
1370        let responses = parse_responses_response_body(&self.name, &body)?;
1371
1372        extract_responses_text(responses)
1373            .ok_or_else(|| anyhow::anyhow!("No response from {} Responses API", self.name))
1374    }
1375
1376    fn convert_tool_specs(
1377        tools: Option<&[crate::tools::ToolSpec]>,
1378    ) -> Option<Vec<serde_json::Value>> {
1379        tools.map(|items| {
1380            items
1381                .iter()
1382                .map(|tool| {
1383                    serde_json::json!({
1384                        "type": "function",
1385                        "function": {
1386                            "name": tool.name,
1387                            "description": tool.description,
1388                            "parameters": tool.parameters,
1389                        }
1390                    })
1391                })
1392                .collect()
1393        })
1394    }
1395
1396    fn to_message_content(
1397        role: &str,
1398        content: &str,
1399        allow_user_image_parts: bool,
1400    ) -> MessageContent {
1401        if role != "user" || !allow_user_image_parts {
1402            return MessageContent::Text(content.to_string());
1403        }
1404
1405        let (cleaned_text, image_refs) = multimodal::parse_image_markers(content);
1406        if image_refs.is_empty() {
1407            return MessageContent::Text(content.to_string());
1408        }
1409
1410        let mut parts = Vec::with_capacity(image_refs.len() + 1);
1411        let trimmed_text = cleaned_text.trim();
1412        if !trimmed_text.is_empty() {
1413            parts.push(MessagePart::Text {
1414                text: trimmed_text.to_string(),
1415            });
1416        }
1417
1418        for image_ref in image_refs {
1419            parts.push(MessagePart::ImageUrl {
1420                image_url: ImageUrlPart { url: image_ref },
1421            });
1422        }
1423
1424        MessageContent::Parts(parts)
1425    }
1426
1427    fn convert_messages_for_native(
1428        messages: &[ChatMessage],
1429        allow_user_image_parts: bool,
1430    ) -> Vec<NativeMessage> {
1431        messages
1432            .iter()
1433            .map(|message| {
1434                if message.role == "assistant" {
1435                    if let Ok(value) = serde_json::from_str::<serde_json::Value>(&message.content)
1436                    {
1437                        if let Some(tool_calls_value) = value.get("tool_calls") {
1438                            if let Ok(parsed_calls) =
1439                                serde_json::from_value::<Vec<ProviderToolCall>>(
1440                                    tool_calls_value.clone(),
1441                                )
1442                            {
1443                                let tool_calls = parsed_calls
1444                                    .into_iter()
1445                                    .map(|tc| ToolCall {
1446                                        id: Some(tc.id),
1447                                        kind: Some("function".to_string()),
1448                                        function: Some(Function {
1449                                            name: Some(tc.name),
1450                                            arguments: Some(tc.arguments),
1451                                        }),
1452                                        name: None,
1453                                        arguments: None,
1454                                        parameters: None,
1455                                    })
1456                                    .collect::<Vec<_>>();
1457
1458                                let content = value
1459                                    .get("content")
1460                                    .and_then(serde_json::Value::as_str)
1461                                    .map(|value| MessageContent::Text(value.to_string()));
1462
1463                                let reasoning_content = value
1464                                    .get("reasoning_content")
1465                                    .and_then(serde_json::Value::as_str)
1466                                    .map(ToString::to_string);
1467
1468                                return NativeMessage {
1469                                    role: "assistant".to_string(),
1470                                    content,
1471                                    tool_call_id: None,
1472                                    tool_calls: Some(tool_calls),
1473                                    reasoning_content,
1474                                };
1475                            }
1476                        }
1477                    }
1478                }
1479
1480                if message.role == "tool" {
1481                    if let Ok(value) = serde_json::from_str::<serde_json::Value>(&message.content) {
1482                        let tool_call_id = value
1483                            .get("tool_call_id")
1484                            .and_then(serde_json::Value::as_str)
1485                            .map(ToString::to_string);
1486                        let content = value
1487                            .get("content")
1488                            .and_then(serde_json::Value::as_str)
1489                            .map(|value| MessageContent::Text(value.to_string()))
1490                            .or_else(|| Some(MessageContent::Text(message.content.clone())));
1491
1492                        return NativeMessage {
1493                            role: "tool".to_string(),
1494                            content,
1495                            tool_call_id,
1496                            tool_calls: None,
1497                            reasoning_content: None,
1498                        };
1499                    }
1500                }
1501
1502                NativeMessage {
1503                    role: message.role.clone(),
1504                    content: Some(Self::to_message_content(
1505                        &message.role,
1506                        &message.content,
1507                        allow_user_image_parts,
1508                    )),
1509                    tool_call_id: None,
1510                    tool_calls: None,
1511                    reasoning_content: None,
1512                }
1513            })
1514            .collect()
1515    }
1516
1517    fn with_prompt_guided_tool_instructions(
1518        messages: &[ChatMessage],
1519        tools: Option<&[crate::tools::ToolSpec]>,
1520    ) -> Vec<ChatMessage> {
1521        let Some(tools) = tools else {
1522            return messages.to_vec();
1523        };
1524
1525        if tools.is_empty() {
1526            return messages.to_vec();
1527        }
1528
1529        let instructions = crate::providers::traits::build_tool_instructions_text(tools);
1530        let mut modified_messages = messages.to_vec();
1531
1532        if let Some(system_message) = modified_messages.iter_mut().find(|m| m.role == "system") {
1533            if !system_message.content.is_empty() {
1534                system_message.content.push_str("\n\n");
1535            }
1536            system_message.content.push_str(&instructions);
1537        } else {
1538            modified_messages.insert(0, ChatMessage::system(instructions));
1539        }
1540
1541        modified_messages
1542    }
1543
1544    fn parse_native_response(message: ResponseMessage) -> ProviderChatResponse {
1545        let text = message.effective_content_optional();
1546        let reasoning_content = message.reasoning_content.clone();
1547        let tool_calls = message
1548            .tool_calls
1549            .unwrap_or_default()
1550            .into_iter()
1551            .filter_map(|tc| {
1552                let name = tc.function_name()?;
1553                let arguments = tc.function_arguments().unwrap_or_else(|| "{}".to_string());
1554                let normalized_arguments =
1555                    if serde_json::from_str::<serde_json::Value>(&arguments).is_ok() {
1556                        arguments
1557                    } else {
1558                        tracing::warn!(
1559                            function = %name,
1560                            arguments = %arguments,
1561                            "Invalid JSON in native tool-call arguments, using empty object"
1562                        );
1563                        "{}".to_string()
1564                    };
1565                Some(ProviderToolCall {
1566                    id: tc.id.unwrap_or_else(|| uuid::Uuid::new_v4().to_string()),
1567                    name,
1568                    arguments: normalized_arguments,
1569                })
1570            })
1571            .collect::<Vec<_>>();
1572
1573        ProviderChatResponse {
1574            text,
1575            tool_calls,
1576            usage: None,
1577            reasoning_content,
1578        }
1579    }
1580
1581    fn is_native_tool_schema_unsupported(status: reqwest::StatusCode, error: &str) -> bool {
1582        if !matches!(
1583            status,
1584            reqwest::StatusCode::BAD_REQUEST | reqwest::StatusCode::UNPROCESSABLE_ENTITY
1585        ) {
1586            return false;
1587        }
1588
1589        let lower = error.to_lowercase();
1590        [
1591            "unknown parameter: tools",
1592            "unsupported parameter: tools",
1593            "unrecognized field `tools`",
1594            "does not support tools",
1595            "function calling is not supported",
1596            "tool_choice",
1597            "tool call validation failed",
1598            "was not in request",
1599        ]
1600        .iter()
1601        .any(|hint| lower.contains(hint))
1602    }
1603}
1604
1605#[async_trait]
1606impl Provider for OpenAiCompatibleProvider {
1607    fn capabilities(&self) -> crate::providers::traits::ProviderCapabilities {
1608        crate::providers::traits::ProviderCapabilities {
1609            native_tool_calling: self.native_tool_calling,
1610            vision: self.supports_vision,
1611            prompt_caching: false,
1612        }
1613    }
1614
1615    async fn chat_with_system(
1616        &self,
1617        system_prompt: Option<&str>,
1618        message: &str,
1619        model: &str,
1620        temperature: f64,
1621    ) -> anyhow::Result<String> {
1622        let credential = self.credential.as_ref().ok_or_else(|| {
1623            anyhow::anyhow!(
1624                "{} API key not set. Run `construct onboard` or set the appropriate env var.",
1625                self.name
1626            )
1627        })?;
1628
1629        let mut messages = Vec::new();
1630
1631        if self.merge_system_into_user {
1632            let content = match system_prompt {
1633                Some(sys) => format!("{sys}\n\n{message}"),
1634                None => message.to_string(),
1635            };
1636            messages.push(Message {
1637                role: "user".to_string(),
1638                content: Self::to_message_content("user", &content, !self.merge_system_into_user),
1639            });
1640        } else {
1641            if let Some(sys) = system_prompt {
1642                messages.push(Message {
1643                    role: "system".to_string(),
1644                    content: MessageContent::Text(sys.to_string()),
1645                });
1646            }
1647            messages.push(Message {
1648                role: "user".to_string(),
1649                content: Self::to_message_content("user", message, true),
1650            });
1651        }
1652
1653        let request = ApiChatRequest {
1654            model: model.to_string(),
1655            messages,
1656            temperature,
1657            stream: Some(false),
1658            stream_options: None,
1659            reasoning_effort: self.reasoning_effort_for_model(model),
1660            tool_stream: None,
1661            tools: None,
1662            tool_choice: None,
1663            max_tokens: self.max_tokens,
1664        };
1665
1666        let url = self.chat_completions_url();
1667
1668        let mut fallback_messages = Vec::new();
1669        if let Some(system_prompt) = system_prompt {
1670            fallback_messages.push(ChatMessage::system(system_prompt));
1671        }
1672        fallback_messages.push(ChatMessage::user(message));
1673        let fallback_messages = if self.merge_system_into_user {
1674            Self::flatten_system_messages(&fallback_messages)
1675        } else {
1676            fallback_messages
1677        };
1678
1679        let response = match self
1680            .apply_auth_header(self.http_client().post(&url).json(&request), credential)
1681            .send()
1682            .await
1683        {
1684            Ok(response) => response,
1685            Err(chat_error) => {
1686                if self.supports_responses_fallback {
1687                    let sanitized = super::sanitize_api_error(&chat_error.to_string());
1688                    return self
1689                        .chat_via_responses(credential, &fallback_messages, model)
1690                        .await
1691                        .map_err(|responses_err| {
1692                            anyhow::anyhow!(
1693                                "{} chat completions transport error: {sanitized} (responses fallback failed: {responses_err})",
1694                                self.name
1695                            )
1696                        });
1697                }
1698
1699                return Err(chat_error.into());
1700            }
1701        };
1702
1703        if !response.status().is_success() {
1704            let status = response.status();
1705            let error = response.text().await?;
1706            let sanitized = super::sanitize_api_error(&error);
1707
1708            if status == reqwest::StatusCode::NOT_FOUND && self.supports_responses_fallback {
1709                return self
1710                    .chat_via_responses(credential, &fallback_messages, model)
1711                    .await
1712                    .map_err(|responses_err| {
1713                        anyhow::anyhow!(
1714                            "{} API error ({status}): {sanitized} (chat completions unavailable; responses fallback failed: {responses_err})",
1715                            self.name
1716                        )
1717                    });
1718            }
1719
1720            anyhow::bail!("{} API error ({status}): {sanitized}", self.name);
1721        }
1722
1723        let body = response.text().await?;
1724        let chat_response = parse_chat_response_body(&self.name, &body)?;
1725
1726        chat_response
1727            .choices
1728            .into_iter()
1729            .next()
1730            .map(|c| {
1731                // If tool_calls are present, serialize the full message as JSON
1732                // so parse_tool_calls can handle the OpenAI-style format
1733                if c.message.tool_calls.is_some()
1734                    && c.message
1735                        .tool_calls
1736                        .as_ref()
1737                        .map_or(false, |t| !t.is_empty())
1738                {
1739                    serde_json::to_string(&c.message)
1740                        .unwrap_or_else(|_| c.message.effective_content())
1741                } else {
1742                    // No tool calls, return content (with reasoning_content fallback)
1743                    c.message.effective_content()
1744                }
1745            })
1746            .ok_or_else(|| anyhow::anyhow!("No response from {}", self.name))
1747    }
1748
1749    async fn chat_with_history(
1750        &self,
1751        messages: &[ChatMessage],
1752        model: &str,
1753        temperature: f64,
1754    ) -> anyhow::Result<String> {
1755        let credential = self.credential.as_ref().ok_or_else(|| {
1756            anyhow::anyhow!(
1757                "{} API key not set. Run `construct onboard` or set the appropriate env var.",
1758                self.name
1759            )
1760        })?;
1761
1762        let effective_messages = if self.merge_system_into_user {
1763            Self::flatten_system_messages(messages)
1764        } else {
1765            messages.to_vec()
1766        };
1767        let api_messages: Vec<Message> = effective_messages
1768            .iter()
1769            .map(|m| Message {
1770                role: m.role.clone(),
1771                content: Self::to_message_content(
1772                    &m.role,
1773                    &m.content,
1774                    !self.merge_system_into_user,
1775                ),
1776            })
1777            .collect();
1778
1779        let request = ApiChatRequest {
1780            model: model.to_string(),
1781            messages: api_messages,
1782            temperature,
1783            stream: Some(false),
1784            stream_options: None,
1785            reasoning_effort: self.reasoning_effort_for_model(model),
1786            tool_stream: None,
1787            tools: None,
1788            tool_choice: None,
1789            max_tokens: self.max_tokens,
1790        };
1791
1792        let url = self.chat_completions_url();
1793        let response = match self
1794            .apply_auth_header(self.http_client().post(&url).json(&request), credential)
1795            .send()
1796            .await
1797        {
1798            Ok(response) => response,
1799            Err(chat_error) => {
1800                if self.supports_responses_fallback {
1801                    let sanitized = super::sanitize_api_error(&chat_error.to_string());
1802                    return self
1803                        .chat_via_responses(credential, &effective_messages, model)
1804                        .await
1805                        .map_err(|responses_err| {
1806                            anyhow::anyhow!(
1807                                "{} chat completions transport error: {sanitized} (responses fallback failed: {responses_err})",
1808                                self.name
1809                            )
1810                        });
1811                }
1812
1813                return Err(chat_error.into());
1814            }
1815        };
1816
1817        if !response.status().is_success() {
1818            let status = response.status();
1819
1820            // Mirror chat_with_system: 404 may mean this provider uses the Responses API
1821            if status == reqwest::StatusCode::NOT_FOUND && self.supports_responses_fallback {
1822                return self
1823                    .chat_via_responses(credential, &effective_messages, model)
1824                    .await
1825                    .map_err(|responses_err| {
1826                        anyhow::anyhow!(
1827                            "{} API error (chat completions unavailable; responses fallback failed: {responses_err})",
1828                            self.name
1829                        )
1830                    });
1831            }
1832
1833            return Err(super::api_error(&self.name, response).await);
1834        }
1835
1836        let body = response.text().await?;
1837        let chat_response = parse_chat_response_body(&self.name, &body)?;
1838
1839        chat_response
1840            .choices
1841            .into_iter()
1842            .next()
1843            .map(|c| {
1844                // If tool_calls are present, serialize the full message as JSON
1845                // so parse_tool_calls can handle the OpenAI-style format
1846                if c.message.tool_calls.is_some()
1847                    && c.message
1848                        .tool_calls
1849                        .as_ref()
1850                        .map_or(false, |t| !t.is_empty())
1851                {
1852                    serde_json::to_string(&c.message)
1853                        .unwrap_or_else(|_| c.message.effective_content())
1854                } else {
1855                    // No tool calls, return content (with reasoning_content fallback)
1856                    c.message.effective_content()
1857                }
1858            })
1859            .ok_or_else(|| anyhow::anyhow!("No response from {}", self.name))
1860    }
1861
1862    async fn chat_with_tools(
1863        &self,
1864        messages: &[ChatMessage],
1865        tools: &[serde_json::Value],
1866        model: &str,
1867        temperature: f64,
1868    ) -> anyhow::Result<ProviderChatResponse> {
1869        let credential = self.credential.as_ref().ok_or_else(|| {
1870            anyhow::anyhow!(
1871                "{} API key not set. Run `construct onboard` or set the appropriate env var.",
1872                self.name
1873            )
1874        })?;
1875
1876        let effective_messages = if self.merge_system_into_user {
1877            Self::flatten_system_messages(messages)
1878        } else {
1879            messages.to_vec()
1880        };
1881        let api_messages: Vec<Message> = effective_messages
1882            .iter()
1883            .map(|m| Message {
1884                role: m.role.clone(),
1885                content: Self::to_message_content(
1886                    &m.role,
1887                    &m.content,
1888                    !self.merge_system_into_user,
1889                ),
1890            })
1891            .collect();
1892
1893        let request = ApiChatRequest {
1894            model: model.to_string(),
1895            messages: api_messages,
1896            temperature,
1897            stream: Some(false),
1898            stream_options: None,
1899            reasoning_effort: self.reasoning_effort_for_model(model),
1900            tool_stream: self.tool_stream_for_tools(!tools.is_empty()),
1901            tools: if tools.is_empty() {
1902                None
1903            } else {
1904                Some(tools.to_vec())
1905            },
1906            tool_choice: if tools.is_empty() {
1907                None
1908            } else {
1909                Some("auto".to_string())
1910            },
1911            max_tokens: self.max_tokens,
1912        };
1913
1914        let url = self.chat_completions_url();
1915        let response = match self
1916            .apply_auth_header(self.http_client().post(&url).json(&request), credential)
1917            .send()
1918            .await
1919        {
1920            Ok(response) => response,
1921            Err(error) => {
1922                tracing::warn!(
1923                    "{} native tool call transport failed: {error}; falling back to history path",
1924                    self.name
1925                );
1926                let text = self.chat_with_history(messages, model, temperature).await?;
1927                return Ok(ProviderChatResponse {
1928                    text: Some(text),
1929                    tool_calls: vec![],
1930                    usage: None,
1931                    reasoning_content: None,
1932                });
1933            }
1934        };
1935
1936        if !response.status().is_success() {
1937            return Err(super::api_error(&self.name, response).await);
1938        }
1939
1940        let body = response.text().await?;
1941        let chat_response = parse_chat_response_body(&self.name, &body)?;
1942        let usage = chat_response.usage.map(|u| TokenUsage {
1943            input_tokens: u.prompt_tokens,
1944            output_tokens: u.completion_tokens,
1945            cached_input_tokens: None,
1946        });
1947        let choice = chat_response
1948            .choices
1949            .into_iter()
1950            .next()
1951            .ok_or_else(|| anyhow::anyhow!("No response from {}", self.name))?;
1952
1953        let text = choice.message.effective_content_optional();
1954        let reasoning_content = choice.message.reasoning_content;
1955        let tool_calls = choice
1956            .message
1957            .tool_calls
1958            .unwrap_or_default()
1959            .into_iter()
1960            .filter_map(|tc| {
1961                let function = tc.function?;
1962                let name = function.name?;
1963                let arguments = function.arguments.unwrap_or_else(|| "{}".to_string());
1964                Some(ProviderToolCall {
1965                    id: uuid::Uuid::new_v4().to_string(),
1966                    name,
1967                    arguments,
1968                })
1969            })
1970            .collect::<Vec<_>>();
1971
1972        Ok(ProviderChatResponse {
1973            text,
1974            tool_calls,
1975            usage,
1976            reasoning_content,
1977        })
1978    }
1979
1980    async fn chat(
1981        &self,
1982        request: ProviderChatRequest<'_>,
1983        model: &str,
1984        temperature: f64,
1985    ) -> anyhow::Result<ProviderChatResponse> {
1986        let credential = self.credential.as_ref().ok_or_else(|| {
1987            anyhow::anyhow!(
1988                "{} API key not set. Run `construct onboard` or set the appropriate env var.",
1989                self.name
1990            )
1991        })?;
1992
1993        let tools = Self::convert_tool_specs(request.tools);
1994        let effective_messages = if self.merge_system_into_user {
1995            Self::flatten_system_messages(request.messages)
1996        } else {
1997            request.messages.to_vec()
1998        };
1999        let native_request = NativeChatRequest {
2000            model: model.to_string(),
2001            messages: Self::convert_messages_for_native(
2002                &effective_messages,
2003                !self.merge_system_into_user,
2004            ),
2005            temperature,
2006            stream: Some(false),
2007            stream_options: None,
2008            reasoning_effort: self.reasoning_effort_for_model(model),
2009            tool_stream: self
2010                .tool_stream_for_tools(tools.as_ref().is_some_and(|tools| !tools.is_empty())),
2011            tool_choice: tools.as_ref().map(|_| "auto".to_string()),
2012            tools,
2013            max_tokens: self.max_tokens,
2014        };
2015
2016        let url = self.chat_completions_url();
2017        let response = match self
2018            .apply_auth_header(
2019                self.http_client().post(&url).json(&native_request),
2020                credential,
2021            )
2022            .send()
2023            .await
2024        {
2025            Ok(response) => response,
2026            Err(chat_error) => {
2027                if self.supports_responses_fallback {
2028                    let sanitized = super::sanitize_api_error(&chat_error.to_string());
2029                    return self
2030                        .chat_via_responses(credential, &effective_messages, model)
2031                        .await
2032                        .map(|text| ProviderChatResponse {
2033                            text: Some(text),
2034                            tool_calls: vec![],
2035                            usage: None,
2036                            reasoning_content: None,
2037                        })
2038                        .map_err(|responses_err| {
2039                            anyhow::anyhow!(
2040                                "{} native chat transport error: {sanitized} (responses fallback failed: {responses_err})",
2041                                self.name
2042                            )
2043                        });
2044                }
2045
2046                return Err(chat_error.into());
2047            }
2048        };
2049
2050        if !response.status().is_success() {
2051            let status = response.status();
2052            let error = response.text().await?;
2053            let sanitized = super::sanitize_api_error(&error);
2054
2055            if Self::is_native_tool_schema_unsupported(status, &sanitized) {
2056                let fallback_messages =
2057                    Self::with_prompt_guided_tool_instructions(request.messages, request.tools);
2058                let text = self
2059                    .chat_with_history(&fallback_messages, model, temperature)
2060                    .await?;
2061                return Ok(ProviderChatResponse {
2062                    text: Some(text),
2063                    tool_calls: vec![],
2064                    usage: None,
2065                    reasoning_content: None,
2066                });
2067            }
2068
2069            if status == reqwest::StatusCode::NOT_FOUND && self.supports_responses_fallback {
2070                return self
2071                    .chat_via_responses(credential, &effective_messages, model)
2072                    .await
2073                    .map(|text| ProviderChatResponse {
2074                        text: Some(text),
2075                        tool_calls: vec![],
2076                        usage: None,
2077                        reasoning_content: None,
2078                    })
2079                    .map_err(|responses_err| {
2080                        anyhow::anyhow!(
2081                            "{} API error ({status}): {sanitized} (chat completions unavailable; responses fallback failed: {responses_err})",
2082                            self.name
2083                        )
2084                    });
2085            }
2086
2087            anyhow::bail!("{} API error ({status}): {sanitized}", self.name);
2088        }
2089
2090        let native_response: ApiChatResponse = response.json().await?;
2091        let usage = native_response.usage.map(|u| TokenUsage {
2092            input_tokens: u.prompt_tokens,
2093            output_tokens: u.completion_tokens,
2094            cached_input_tokens: None,
2095        });
2096        let message = native_response
2097            .choices
2098            .into_iter()
2099            .next()
2100            .map(|choice| choice.message)
2101            .ok_or_else(|| anyhow::anyhow!("No response from {}", self.name))?;
2102
2103        let mut result = Self::parse_native_response(message);
2104        result.usage = usage;
2105        Ok(result)
2106    }
2107
2108    fn supports_native_tools(&self) -> bool {
2109        self.native_tool_calling
2110    }
2111
2112    fn supports_streaming(&self) -> bool {
2113        true
2114    }
2115
2116    fn supports_streaming_tool_events(&self) -> bool {
2117        self.native_tool_calling
2118    }
2119
2120    fn stream_chat(
2121        &self,
2122        request: ProviderChatRequest<'_>,
2123        model: &str,
2124        temperature: f64,
2125        options: StreamOptions,
2126    ) -> stream::BoxStream<'static, StreamResult<StreamEvent>> {
2127        if !options.enabled {
2128            return stream::once(async { Ok(StreamEvent::Final) }).boxed();
2129        }
2130
2131        let credential = match self.credential.as_ref() {
2132            Some(value) => value.clone(),
2133            None => {
2134                let provider_name = self.name.clone();
2135                return stream::once(async move {
2136                    Err(StreamError::Provider(format!(
2137                        "{} API key not set",
2138                        provider_name
2139                    )))
2140                })
2141                .boxed();
2142            }
2143        };
2144
2145        let has_tools = request.tools.is_some_and(|tools| !tools.is_empty());
2146        let effective_messages = if self.merge_system_into_user {
2147            Self::flatten_system_messages(request.messages)
2148        } else {
2149            request.messages.to_vec()
2150        };
2151
2152        let tools = Self::convert_tool_specs(request.tools);
2153        let payload = if has_tools {
2154            serde_json::to_value(NativeChatRequest {
2155                model: model.to_string(),
2156                messages: Self::convert_messages_for_native(
2157                    &effective_messages,
2158                    !self.merge_system_into_user,
2159                ),
2160                temperature,
2161                reasoning_effort: self.reasoning_effort.clone(),
2162                tool_stream: if options.enabled { Some(true) } else { None },
2163                stream: Some(options.enabled),
2164                stream_options: if options.enabled {
2165                    Some(StreamOptionsPayload {
2166                        include_usage: true,
2167                    })
2168                } else {
2169                    None
2170                },
2171                tools: tools.clone(),
2172                tool_choice: tools.as_ref().map(|_| "auto".to_string()),
2173                max_tokens: self.max_tokens,
2174            })
2175        } else {
2176            let messages = effective_messages
2177                .iter()
2178                .map(|message| Message {
2179                    role: message.role.clone(),
2180                    content: Self::to_message_content(
2181                        &message.role,
2182                        &message.content,
2183                        !self.merge_system_into_user,
2184                    ),
2185                })
2186                .collect();
2187
2188            serde_json::to_value(ApiChatRequest {
2189                model: model.to_string(),
2190                messages,
2191                temperature,
2192                reasoning_effort: self.reasoning_effort.clone(),
2193                tool_stream: if options.enabled { Some(true) } else { None },
2194                stream: Some(options.enabled),
2195                stream_options: if options.enabled {
2196                    Some(StreamOptionsPayload {
2197                        include_usage: true,
2198                    })
2199                } else {
2200                    None
2201                },
2202                tools: None,
2203                tool_choice: None,
2204                max_tokens: self.max_tokens,
2205            })
2206        };
2207
2208        let payload = match payload {
2209            Ok(payload) => payload,
2210            Err(error) => {
2211                return stream::once(async move { Err(StreamError::Json(error)) }).boxed();
2212            }
2213        };
2214
2215        let url = self.chat_completions_url();
2216        let client = self.http_client();
2217        let auth_header = self.auth_header.clone();
2218        let count_tokens = options.count_tokens;
2219
2220        let (tx, rx) = tokio::sync::mpsc::channel::<StreamResult<StreamEvent>>(100);
2221
2222        tokio::spawn(async move {
2223            let mut req_builder = client.post(&url).json(&payload);
2224
2225            req_builder = match &auth_header {
2226                AuthStyle::Bearer => {
2227                    req_builder.header("Authorization", format!("Bearer {}", credential))
2228                }
2229                AuthStyle::XApiKey => req_builder.header("x-api-key", &credential),
2230                AuthStyle::Custom(header) => req_builder.header(header, &credential),
2231            };
2232            req_builder = req_builder.header("Accept", "text/event-stream");
2233
2234            let response = match req_builder.send().await {
2235                Ok(r) => r,
2236                Err(e) => {
2237                    let _ = tx.send(Err(StreamError::Http(e))).await;
2238                    return;
2239                }
2240            };
2241
2242            if !response.status().is_success() {
2243                let status = response.status();
2244                let error = match response.text().await {
2245                    Ok(text) => text,
2246                    Err(_) => format!("HTTP error: {}", status),
2247                };
2248                let _ = tx
2249                    .send(Err(StreamError::Provider(format!("{}: {}", status, error))))
2250                    .await;
2251                return;
2252            }
2253
2254            let mut event_stream = sse_bytes_to_events(response, count_tokens);
2255            while let Some(event) = event_stream.next().await {
2256                if tx.send(event).await.is_err() {
2257                    break;
2258                }
2259            }
2260        });
2261
2262        stream::unfold(rx, |mut rx| async move {
2263            rx.recv().await.map(|event| (event, rx))
2264        })
2265        .boxed()
2266    }
2267
2268    fn stream_chat_with_system(
2269        &self,
2270        system_prompt: Option<&str>,
2271        message: &str,
2272        model: &str,
2273        temperature: f64,
2274        options: StreamOptions,
2275    ) -> stream::BoxStream<'static, StreamResult<StreamChunk>> {
2276        let credential = match self.credential.as_ref() {
2277            Some(value) => value.clone(),
2278            None => {
2279                let provider_name = self.name.clone();
2280                return stream::once(async move {
2281                    Err(StreamError::Provider(format!(
2282                        "{} API key not set",
2283                        provider_name
2284                    )))
2285                })
2286                .boxed();
2287            }
2288        };
2289
2290        let mut messages = Vec::new();
2291        if let Some(sys) = system_prompt {
2292            messages.push(Message {
2293                role: "system".to_string(),
2294                content: MessageContent::Text(sys.to_string()),
2295            });
2296        }
2297        messages.push(Message {
2298            role: "user".to_string(),
2299            content: Self::to_message_content("user", message, !self.merge_system_into_user),
2300        });
2301
2302        let request = ApiChatRequest {
2303            model: model.to_string(),
2304            messages,
2305            temperature,
2306            stream: Some(options.enabled),
2307            stream_options: None,
2308            reasoning_effort: self.reasoning_effort_for_model(model),
2309            tool_stream: None,
2310            tools: None,
2311            tool_choice: None,
2312            max_tokens: self.max_tokens,
2313        };
2314
2315        let url = self.chat_completions_url();
2316        let client = self.http_client();
2317        let auth_header = self.auth_header.clone();
2318
2319        // Use a channel to bridge the async HTTP response to the stream
2320        let (tx, rx) = tokio::sync::mpsc::channel::<StreamResult<StreamChunk>>(100);
2321
2322        tokio::spawn(async move {
2323            // Build request with auth
2324            let mut req_builder = client.post(&url).json(&request);
2325
2326            // Apply auth header
2327            req_builder = match &auth_header {
2328                AuthStyle::Bearer => {
2329                    req_builder.header("Authorization", format!("Bearer {}", credential))
2330                }
2331                AuthStyle::XApiKey => req_builder.header("x-api-key", &credential),
2332                AuthStyle::Custom(header) => req_builder.header(header, &credential),
2333            };
2334
2335            // Set accept header for streaming
2336            req_builder = req_builder.header("Accept", "text/event-stream");
2337
2338            // Send request
2339            let response = match req_builder.send().await {
2340                Ok(r) => r,
2341                Err(e) => {
2342                    let _ = tx.send(Err(StreamError::Http(e))).await;
2343                    return;
2344                }
2345            };
2346
2347            // Check status
2348            if !response.status().is_success() {
2349                let status = response.status();
2350                let error = match response.text().await {
2351                    Ok(e) => e,
2352                    Err(_) => format!("HTTP error: {}", status),
2353                };
2354                let _ = tx
2355                    .send(Err(StreamError::Provider(format!("{}: {}", status, error))))
2356                    .await;
2357                return;
2358            }
2359
2360            // Convert to chunk stream and forward to channel
2361            let mut chunk_stream = sse_bytes_to_chunks(response, options.count_tokens);
2362            while let Some(chunk) = chunk_stream.next().await {
2363                if tx.send(chunk).await.is_err() {
2364                    break; // Receiver dropped
2365                }
2366            }
2367        });
2368
2369        // Convert channel receiver to stream
2370        stream::unfold(rx, |mut rx| async move {
2371            rx.recv().await.map(|chunk| (chunk, rx))
2372        })
2373        .boxed()
2374    }
2375
2376    fn stream_chat_with_history(
2377        &self,
2378        messages: &[ChatMessage],
2379        model: &str,
2380        temperature: f64,
2381        options: StreamOptions,
2382    ) -> stream::BoxStream<'static, StreamResult<StreamChunk>> {
2383        let credential = match self.credential.as_ref() {
2384            Some(value) => value.clone(),
2385            None => {
2386                let provider_name = self.name.clone();
2387                return stream::once(async move {
2388                    Err(StreamError::Provider(format!(
2389                        "{} API key not set",
2390                        provider_name
2391                    )))
2392                })
2393                .boxed();
2394            }
2395        };
2396
2397        let effective_messages = if self.merge_system_into_user {
2398            Self::flatten_system_messages(messages)
2399        } else {
2400            messages.to_vec()
2401        };
2402        let api_messages: Vec<Message> = effective_messages
2403            .iter()
2404            .map(|m| Message {
2405                role: m.role.clone(),
2406                content: Self::to_message_content(
2407                    &m.role,
2408                    &m.content,
2409                    !self.merge_system_into_user,
2410                ),
2411            })
2412            .collect();
2413
2414        let request = ApiChatRequest {
2415            model: model.to_string(),
2416            messages: api_messages,
2417            temperature,
2418            stream: Some(options.enabled),
2419            stream_options: None,
2420            reasoning_effort: self.reasoning_effort_for_model(model),
2421            tool_stream: None,
2422            tools: None,
2423            tool_choice: None,
2424            max_tokens: self.max_tokens,
2425        };
2426
2427        let url = self.chat_completions_url();
2428        let client = self.http_client();
2429        let auth_header = self.auth_header.clone();
2430
2431        let (tx, rx) = tokio::sync::mpsc::channel::<StreamResult<StreamChunk>>(100);
2432
2433        tokio::spawn(async move {
2434            let mut req_builder = client.post(&url).json(&request);
2435
2436            req_builder = match &auth_header {
2437                AuthStyle::Bearer => {
2438                    req_builder.header("Authorization", format!("Bearer {}", credential))
2439                }
2440                AuthStyle::XApiKey => req_builder.header("x-api-key", &credential),
2441                AuthStyle::Custom(header) => req_builder.header(header, &credential),
2442            };
2443
2444            req_builder = req_builder.header("Accept", "text/event-stream");
2445
2446            let response = match req_builder.send().await {
2447                Ok(r) => r,
2448                Err(e) => {
2449                    let _ = tx.send(Err(StreamError::Http(e))).await;
2450                    return;
2451                }
2452            };
2453
2454            if !response.status().is_success() {
2455                let status = response.status();
2456                let error = match response.text().await {
2457                    Ok(e) => e,
2458                    Err(_) => format!("HTTP error: {}", status),
2459                };
2460                let _ = tx
2461                    .send(Err(StreamError::Provider(format!("{}: {}", status, error))))
2462                    .await;
2463                return;
2464            }
2465
2466            let mut chunk_stream = sse_bytes_to_chunks(response, options.count_tokens);
2467            while let Some(chunk) = chunk_stream.next().await {
2468                if tx.send(chunk).await.is_err() {
2469                    break;
2470                }
2471            }
2472        });
2473
2474        stream::unfold(rx, |mut rx| async move {
2475            rx.recv().await.map(|chunk| (chunk, rx))
2476        })
2477        .boxed()
2478    }
2479
2480    async fn warmup(&self) -> anyhow::Result<()> {
2481        if let Some(credential) = self.credential.as_ref() {
2482            // Hit the chat completions URL with a GET to establish the connection pool.
2483            // The server will likely return 405 Method Not Allowed, which is fine -
2484            // the goal is TLS handshake and HTTP/2 negotiation.
2485            let url = self.chat_completions_url();
2486            let _ = self
2487                .apply_auth_header(self.http_client().get(&url), credential)
2488                .send()
2489                .await?;
2490        }
2491        Ok(())
2492    }
2493}
2494
2495#[cfg(test)]
2496mod tests {
2497    use super::*;
2498
2499    fn make_provider(name: &str, url: &str, key: Option<&str>) -> OpenAiCompatibleProvider {
2500        OpenAiCompatibleProvider::new(name, url, key, AuthStyle::Bearer)
2501    }
2502
2503    #[test]
2504    fn creates_with_key() {
2505        let p = make_provider(
2506            "venice",
2507            "https://api.venice.ai",
2508            Some("venice-test-credential"),
2509        );
2510        assert_eq!(p.name, "venice");
2511        assert_eq!(p.base_url, "https://api.venice.ai");
2512        assert_eq!(p.credential.as_deref(), Some("venice-test-credential"));
2513    }
2514
2515    #[test]
2516    fn creates_without_key() {
2517        let p = make_provider("test", "https://example.com", None);
2518        assert!(p.credential.is_none());
2519    }
2520
2521    #[test]
2522    fn strips_trailing_slash() {
2523        let p = make_provider("test", "https://example.com/", None);
2524        assert_eq!(p.base_url, "https://example.com");
2525    }
2526
2527    #[tokio::test]
2528    async fn chat_fails_without_key() {
2529        let p = make_provider("Venice", "https://api.venice.ai", None);
2530        let result = p
2531            .chat_with_system(None, "hello", "llama-3.3-70b", 0.7)
2532            .await;
2533        assert!(result.is_err());
2534        assert!(
2535            result
2536                .unwrap_err()
2537                .to_string()
2538                .contains("Venice API key not set")
2539        );
2540    }
2541
2542    #[test]
2543    fn request_serializes_correctly() {
2544        let req = ApiChatRequest {
2545            model: "llama-3.3-70b".to_string(),
2546            messages: vec![
2547                Message {
2548                    role: "system".to_string(),
2549                    content: MessageContent::Text("You are Construct".to_string()),
2550                },
2551                Message {
2552                    role: "user".to_string(),
2553                    content: MessageContent::Text("hello".to_string()),
2554                },
2555            ],
2556            temperature: 0.4,
2557            stream: Some(false),
2558            stream_options: None,
2559            reasoning_effort: None,
2560            tool_stream: None,
2561            tools: None,
2562            tool_choice: None,
2563            max_tokens: None,
2564        };
2565        let json = serde_json::to_string(&req).unwrap();
2566        assert!(json.contains("llama-3.3-70b"));
2567        assert!(json.contains("system"));
2568        assert!(json.contains("user"));
2569        // tools/tool_choice should be omitted when None
2570        assert!(!json.contains("tools"));
2571        assert!(!json.contains("tool_choice"));
2572    }
2573
2574    #[test]
2575    fn response_deserializes() {
2576        let json = r#"{"choices":[{"message":{"content":"Hello from Venice!"}}]}"#;
2577        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
2578        assert_eq!(
2579            resp.choices[0].message.content,
2580            Some("Hello from Venice!".to_string())
2581        );
2582    }
2583
2584    #[test]
2585    fn response_empty_choices() {
2586        let json = r#"{"choices":[]}"#;
2587        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
2588        assert!(resp.choices.is_empty());
2589    }
2590
2591    #[test]
2592    fn parse_chat_response_body_reports_sanitized_snippet() {
2593        let body = r#"{"choices":"invalid","api_key":"sk-test-secret-value"}"#;
2594        let err = parse_chat_response_body("custom", body).expect_err("payload should fail");
2595        let msg = err.to_string();
2596
2597        assert!(msg.contains("custom API returned an unexpected chat-completions payload"));
2598        assert!(msg.contains("body="));
2599        assert!(msg.contains("[REDACTED]"));
2600        assert!(!msg.contains("sk-test-secret-value"));
2601    }
2602
2603    #[test]
2604    fn parse_responses_response_body_reports_sanitized_snippet() {
2605        let body = r#"{"output_text":123,"api_key":"sk-another-secret"}"#;
2606        let err = parse_responses_response_body("custom", body).expect_err("payload should fail");
2607        let msg = err.to_string();
2608
2609        assert!(msg.contains("custom Responses API returned an unexpected payload"));
2610        assert!(msg.contains("body="));
2611        assert!(msg.contains("[REDACTED]"));
2612        assert!(!msg.contains("sk-another-secret"));
2613    }
2614
2615    #[test]
2616    fn x_api_key_auth_style() {
2617        let p = OpenAiCompatibleProvider::new(
2618            "moonshot",
2619            "https://api.moonshot.cn",
2620            Some("ms-key"),
2621            AuthStyle::XApiKey,
2622        );
2623        assert!(matches!(p.auth_header, AuthStyle::XApiKey));
2624    }
2625
2626    #[test]
2627    fn custom_auth_style() {
2628        let p = OpenAiCompatibleProvider::new(
2629            "custom",
2630            "https://api.example.com",
2631            Some("key"),
2632            AuthStyle::Custom("X-Custom-Key".into()),
2633        );
2634        assert!(matches!(p.auth_header, AuthStyle::Custom(_)));
2635    }
2636
2637    #[tokio::test]
2638    async fn all_compatible_providers_fail_without_key() {
2639        let providers = vec![
2640            make_provider("Venice", "https://api.venice.ai", None),
2641            make_provider("Moonshot", "https://api.moonshot.cn", None),
2642            make_provider("GLM", "https://open.bigmodel.cn", None),
2643            make_provider("MiniMax", "https://api.minimaxi.com/v1", None),
2644            make_provider("Groq", "https://api.groq.com/openai", None),
2645            make_provider("Mistral", "https://api.mistral.ai", None),
2646            make_provider("xAI", "https://api.x.ai", None),
2647            make_provider("Astrai", "https://as-trai.com/v1", None),
2648        ];
2649
2650        for p in providers {
2651            let result = p.chat_with_system(None, "test", "model", 0.7).await;
2652            assert!(result.is_err(), "{} should fail without key", p.name);
2653            assert!(
2654                result.unwrap_err().to_string().contains("API key not set"),
2655                "{} error should mention key",
2656                p.name
2657            );
2658        }
2659    }
2660
2661    #[test]
2662    fn responses_extracts_top_level_output_text() {
2663        let json = r#"{"output_text":"Hello from top-level","output":[]}"#;
2664        let response: ResponsesResponse = serde_json::from_str(json).unwrap();
2665        assert_eq!(
2666            extract_responses_text(response).as_deref(),
2667            Some("Hello from top-level")
2668        );
2669    }
2670
2671    #[test]
2672    fn responses_extracts_nested_output_text() {
2673        let json =
2674            r#"{"output":[{"content":[{"type":"output_text","text":"Hello from nested"}]}]}"#;
2675        let response: ResponsesResponse = serde_json::from_str(json).unwrap();
2676        assert_eq!(
2677            extract_responses_text(response).as_deref(),
2678            Some("Hello from nested")
2679        );
2680    }
2681
2682    #[test]
2683    fn responses_extracts_any_text_as_fallback() {
2684        let json = r#"{"output":[{"content":[{"type":"message","text":"Fallback text"}]}]}"#;
2685        let response: ResponsesResponse = serde_json::from_str(json).unwrap();
2686        assert_eq!(
2687            extract_responses_text(response).as_deref(),
2688            Some("Fallback text")
2689        );
2690    }
2691
2692    #[test]
2693    fn build_responses_prompt_preserves_multi_turn_history() {
2694        let messages = vec![
2695            ChatMessage::system("policy"),
2696            ChatMessage::user("step 1"),
2697            ChatMessage::assistant("ack 1"),
2698            ChatMessage::tool("{\"result\":\"ok\"}"),
2699            ChatMessage::user("step 2"),
2700        ];
2701
2702        let (instructions, input) = build_responses_prompt(&messages);
2703
2704        assert_eq!(instructions.as_deref(), Some("policy"));
2705        assert_eq!(input.len(), 4);
2706
2707        let serialized: Vec<serde_json::Value> = input
2708            .iter()
2709            .map(|item| serde_json::to_value(item).expect("responses input item serializes"))
2710            .collect();
2711        assert_eq!(
2712            serialized[0],
2713            serde_json::json!({
2714                "role": "user",
2715                "content": "step 1"
2716            })
2717        );
2718        assert_eq!(
2719            serialized[1],
2720            serde_json::json!({
2721                "role": "assistant",
2722                "type": "message",
2723                "content": [{
2724                    "type": "output_text",
2725                    "text": "ack 1"
2726                }]
2727            })
2728        );
2729        assert_eq!(
2730            serialized[2],
2731            serde_json::json!({
2732                "role": "assistant",
2733                "type": "message",
2734                "content": [{
2735                    "type": "output_text",
2736                    "text": "{\"result\":\"ok\"}"
2737                }]
2738            })
2739        );
2740        assert_eq!(
2741            serialized[3],
2742            serde_json::json!({
2743                "role": "user",
2744                "content": "step 2"
2745            })
2746        );
2747    }
2748
2749    #[tokio::test]
2750    async fn chat_via_responses_requires_non_system_message() {
2751        let provider = make_provider("custom", "https://api.example.com", Some("test-key"));
2752        let err = provider
2753            .chat_via_responses("test-key", &[ChatMessage::system("policy")], "gpt-test")
2754            .await
2755            .expect_err("system-only fallback payload should fail");
2756
2757        assert!(
2758            err.to_string()
2759                .contains("requires at least one non-system message")
2760        );
2761    }
2762
2763    #[test]
2764    fn tool_call_function_name_falls_back_to_top_level_name() {
2765        let call: ToolCall = serde_json::from_value(serde_json::json!({
2766            "name": "memory_recall",
2767            "arguments": "{\"query\":\"latest roadmap\"}"
2768        }))
2769        .unwrap();
2770
2771        assert_eq!(call.function_name().as_deref(), Some("memory_recall"));
2772    }
2773
2774    #[test]
2775    fn tool_call_function_arguments_falls_back_to_parameters_object() {
2776        let call: ToolCall = serde_json::from_value(serde_json::json!({
2777            "name": "shell",
2778            "parameters": {"command": "pwd"}
2779        }))
2780        .unwrap();
2781
2782        assert_eq!(
2783            call.function_arguments().as_deref(),
2784            Some("{\"command\":\"pwd\"}")
2785        );
2786    }
2787
2788    #[test]
2789    fn tool_call_function_arguments_prefers_nested_function_field() {
2790        let call: ToolCall = serde_json::from_value(serde_json::json!({
2791            "name": "ignored_name",
2792            "arguments": "{\"query\":\"ignored\"}",
2793            "function": {
2794                "name": "memory_recall",
2795                "arguments": "{\"query\":\"preferred\"}"
2796            }
2797        }))
2798        .unwrap();
2799
2800        assert_eq!(call.function_name().as_deref(), Some("memory_recall"));
2801        assert_eq!(
2802            call.function_arguments().as_deref(),
2803            Some("{\"query\":\"preferred\"}")
2804        );
2805    }
2806
2807    // ----------------------------------------------------------
2808    // Custom endpoint path tests (Issue #114)
2809    // ----------------------------------------------------------
2810
2811    #[test]
2812    fn chat_completions_url_standard_openai() {
2813        // Standard OpenAI-compatible providers get /chat/completions appended
2814        let p = make_provider("openai", "https://api.openai.com/v1", None);
2815        assert_eq!(
2816            p.chat_completions_url(),
2817            "https://api.openai.com/v1/chat/completions"
2818        );
2819    }
2820
2821    #[test]
2822    fn chat_completions_url_trailing_slash() {
2823        // Trailing slash is stripped, then /chat/completions appended
2824        let p = make_provider("test", "https://api.example.com/v1/", None);
2825        assert_eq!(
2826            p.chat_completions_url(),
2827            "https://api.example.com/v1/chat/completions"
2828        );
2829    }
2830
2831    #[test]
2832    fn chat_completions_url_volcengine_ark() {
2833        // VolcEngine ARK uses custom path - should use as-is
2834        let p = make_provider(
2835            "volcengine",
2836            "https://ark.cn-beijing.volces.com/api/coding/v3/chat/completions",
2837            None,
2838        );
2839        assert_eq!(
2840            p.chat_completions_url(),
2841            "https://ark.cn-beijing.volces.com/api/coding/v3/chat/completions"
2842        );
2843    }
2844
2845    #[test]
2846    fn chat_completions_url_custom_full_endpoint() {
2847        // Custom provider with full endpoint path
2848        let p = make_provider(
2849            "custom",
2850            "https://my-api.example.com/v2/llm/chat/completions",
2851            None,
2852        );
2853        assert_eq!(
2854            p.chat_completions_url(),
2855            "https://my-api.example.com/v2/llm/chat/completions"
2856        );
2857    }
2858
2859    #[test]
2860    fn chat_completions_url_requires_exact_suffix_match() {
2861        let p = make_provider(
2862            "custom",
2863            "https://my-api.example.com/v2/llm/chat/completions-proxy",
2864            None,
2865        );
2866        assert_eq!(
2867            p.chat_completions_url(),
2868            "https://my-api.example.com/v2/llm/chat/completions-proxy/chat/completions"
2869        );
2870    }
2871
2872    #[test]
2873    fn responses_url_standard() {
2874        // Standard providers get /v1/responses appended
2875        let p = make_provider("test", "https://api.example.com", None);
2876        assert_eq!(p.responses_url(), "https://api.example.com/v1/responses");
2877    }
2878
2879    #[test]
2880    fn responses_url_custom_full_endpoint() {
2881        // Custom provider with full responses endpoint
2882        let p = make_provider(
2883            "custom",
2884            "https://my-api.example.com/api/v2/responses",
2885            None,
2886        );
2887        assert_eq!(
2888            p.responses_url(),
2889            "https://my-api.example.com/api/v2/responses"
2890        );
2891    }
2892
2893    #[test]
2894    fn responses_url_requires_exact_suffix_match() {
2895        let p = make_provider(
2896            "custom",
2897            "https://my-api.example.com/api/v2/responses-proxy",
2898            None,
2899        );
2900        assert_eq!(
2901            p.responses_url(),
2902            "https://my-api.example.com/api/v2/responses-proxy/responses"
2903        );
2904    }
2905
2906    #[test]
2907    fn responses_url_derives_from_chat_endpoint() {
2908        let p = make_provider(
2909            "custom",
2910            "https://my-api.example.com/api/v2/chat/completions",
2911            None,
2912        );
2913        assert_eq!(
2914            p.responses_url(),
2915            "https://my-api.example.com/api/v2/responses"
2916        );
2917    }
2918
2919    #[test]
2920    fn responses_url_base_with_v1_no_duplicate() {
2921        let p = make_provider("test", "https://api.example.com/v1", None);
2922        assert_eq!(p.responses_url(), "https://api.example.com/v1/responses");
2923    }
2924
2925    #[test]
2926    fn responses_url_non_v1_api_path_uses_raw_suffix() {
2927        let p = make_provider("test", "https://api.example.com/api/coding/v3", None);
2928        assert_eq!(
2929            p.responses_url(),
2930            "https://api.example.com/api/coding/v3/responses"
2931        );
2932    }
2933
2934    #[test]
2935    fn chat_completions_url_without_v1() {
2936        // Provider configured without /v1 in base URL
2937        let p = make_provider("test", "https://api.example.com", None);
2938        assert_eq!(
2939            p.chat_completions_url(),
2940            "https://api.example.com/chat/completions"
2941        );
2942    }
2943
2944    #[test]
2945    fn chat_completions_url_base_with_v1() {
2946        // Provider configured with /v1 in base URL
2947        let p = make_provider("test", "https://api.example.com/v1", None);
2948        assert_eq!(
2949            p.chat_completions_url(),
2950            "https://api.example.com/v1/chat/completions"
2951        );
2952    }
2953
2954    // ----------------------------------------------------------
2955    // Provider-specific endpoint tests (Issue #167)
2956    // ----------------------------------------------------------
2957
2958    #[test]
2959    fn chat_completions_url_zai() {
2960        // Z.AI uses /api/paas/v4 base path
2961        let p = make_provider("zai", "https://api.z.ai/api/paas/v4", None);
2962        assert_eq!(
2963            p.chat_completions_url(),
2964            "https://api.z.ai/api/paas/v4/chat/completions"
2965        );
2966    }
2967
2968    #[test]
2969    fn chat_completions_url_minimax() {
2970        // MiniMax OpenAI-compatible endpoint requires /v1 base path.
2971        let p = make_provider("minimax", "https://api.minimaxi.com/v1", None);
2972        assert_eq!(
2973            p.chat_completions_url(),
2974            "https://api.minimaxi.com/v1/chat/completions"
2975        );
2976    }
2977
2978    #[test]
2979    fn chat_completions_url_glm() {
2980        // GLM (BigModel) uses /api/paas/v4 base path
2981        let p = make_provider("glm", "https://open.bigmodel.cn/api/paas/v4", None);
2982        assert_eq!(
2983            p.chat_completions_url(),
2984            "https://open.bigmodel.cn/api/paas/v4/chat/completions"
2985        );
2986    }
2987
2988    #[test]
2989    fn chat_completions_url_opencode() {
2990        // OpenCode Zen uses /zen/v1 base path
2991        let p = make_provider("opencode", "https://opencode.ai/zen/v1", None);
2992        assert_eq!(
2993            p.chat_completions_url(),
2994            "https://opencode.ai/zen/v1/chat/completions"
2995        );
2996    }
2997
2998    #[test]
2999    fn chat_completions_url_opencode_go() {
3000        // OpenCode Go uses /zen/go/v1 base path
3001        let p = make_provider("opencode-go", "https://opencode.ai/zen/go/v1", None);
3002        assert_eq!(
3003            p.chat_completions_url(),
3004            "https://opencode.ai/zen/go/v1/chat/completions"
3005        );
3006    }
3007
3008    #[test]
3009    fn parse_native_response_preserves_tool_call_id() {
3010        let message = ResponseMessage {
3011            content: None,
3012            tool_calls: Some(vec![ToolCall {
3013                id: Some("call_123".to_string()),
3014                kind: Some("function".to_string()),
3015                function: Some(Function {
3016                    name: Some("shell".to_string()),
3017                    arguments: Some(r#"{"command":"pwd"}"#.to_string()),
3018                }),
3019                name: None,
3020                arguments: None,
3021                parameters: None,
3022            }]),
3023            reasoning_content: None,
3024        };
3025
3026        let parsed = OpenAiCompatibleProvider::parse_native_response(message);
3027        assert_eq!(parsed.tool_calls.len(), 1);
3028        assert_eq!(parsed.tool_calls[0].id, "call_123");
3029        assert_eq!(parsed.tool_calls[0].name, "shell");
3030    }
3031
3032    #[test]
3033    fn convert_messages_for_native_maps_tool_result_payload() {
3034        let input = vec![ChatMessage::tool(
3035            r#"{"tool_call_id":"call_abc","content":"done"}"#,
3036        )];
3037
3038        let converted = OpenAiCompatibleProvider::convert_messages_for_native(&input, true);
3039        assert_eq!(converted.len(), 1);
3040        assert_eq!(converted[0].role, "tool");
3041        assert_eq!(converted[0].tool_call_id.as_deref(), Some("call_abc"));
3042        assert!(matches!(
3043            converted[0].content.as_ref(),
3044            Some(MessageContent::Text(value)) if value == "done"
3045        ));
3046    }
3047
3048    #[test]
3049    fn convert_messages_for_native_keeps_user_image_markers_as_text_when_disabled() {
3050        let input = vec![ChatMessage::user(
3051            "System primer [IMAGE:data:image/png;base64,abcd] user turn",
3052        )];
3053
3054        let converted = OpenAiCompatibleProvider::convert_messages_for_native(&input, false);
3055        assert_eq!(converted.len(), 1);
3056        assert_eq!(converted[0].role, "user");
3057        assert!(matches!(
3058            converted[0].content.as_ref(),
3059            Some(MessageContent::Text(value))
3060                if value == "System primer [IMAGE:data:image/png;base64,abcd] user turn"
3061        ));
3062    }
3063
3064    #[test]
3065    fn flatten_system_messages_merges_into_first_user() {
3066        let input = vec![
3067            ChatMessage::system("core policy"),
3068            ChatMessage::assistant("ack"),
3069            ChatMessage::system("delivery rules"),
3070            ChatMessage::user("hello"),
3071            ChatMessage::assistant("post-user"),
3072        ];
3073
3074        let output = OpenAiCompatibleProvider::flatten_system_messages(&input);
3075        assert_eq!(output.len(), 3);
3076        assert_eq!(output[0].role, "assistant");
3077        assert_eq!(output[0].content, "ack");
3078        assert_eq!(output[1].role, "user");
3079        assert_eq!(output[1].content, "core policy\n\ndelivery rules\n\nhello");
3080        assert_eq!(output[2].role, "assistant");
3081        assert_eq!(output[2].content, "post-user");
3082        assert!(output.iter().all(|m| m.role != "system"));
3083    }
3084
3085    #[test]
3086    fn flatten_system_messages_inserts_user_when_missing() {
3087        let input = vec![
3088            ChatMessage::system("core policy"),
3089            ChatMessage::assistant("ack"),
3090        ];
3091
3092        let output = OpenAiCompatibleProvider::flatten_system_messages(&input);
3093        assert_eq!(output.len(), 2);
3094        assert_eq!(output[0].role, "user");
3095        assert_eq!(output[0].content, "core policy");
3096        assert_eq!(output[1].role, "assistant");
3097        assert_eq!(output[1].content, "ack");
3098    }
3099
3100    #[test]
3101    fn strip_think_tags_drops_unclosed_block_suffix() {
3102        let input = "visible<think>hidden";
3103        assert_eq!(strip_think_tags(input), "visible");
3104    }
3105
3106    #[test]
3107    fn native_tool_schema_unsupported_detection_is_precise() {
3108        assert!(OpenAiCompatibleProvider::is_native_tool_schema_unsupported(
3109            reqwest::StatusCode::BAD_REQUEST,
3110            "unknown parameter: tools"
3111        ));
3112        assert!(
3113            !OpenAiCompatibleProvider::is_native_tool_schema_unsupported(
3114                reqwest::StatusCode::UNAUTHORIZED,
3115                "unknown parameter: tools"
3116            )
3117        );
3118    }
3119
3120    #[test]
3121    fn native_tool_schema_unsupported_detects_groq_tool_validation_error() {
3122        assert!(OpenAiCompatibleProvider::is_native_tool_schema_unsupported(
3123            reqwest::StatusCode::BAD_REQUEST,
3124            r#"Groq API error (400 Bad Request): {"error":{"message":"tool call validation failed: attempted to call tool 'memory_recall={\"limit\":5}' which was not in request"}}"#
3125        ));
3126    }
3127
3128    #[test]
3129    fn prompt_guided_tool_fallback_injects_system_instruction() {
3130        let input = vec![ChatMessage::user("check status")];
3131        let tools = vec![crate::tools::ToolSpec {
3132            name: "shell_exec".to_string(),
3133            description: "Execute shell command".to_string(),
3134            parameters: serde_json::json!({
3135                "type": "object",
3136                "properties": {
3137                    "command": { "type": "string" }
3138                },
3139                "required": ["command"]
3140            }),
3141        }];
3142
3143        let output =
3144            OpenAiCompatibleProvider::with_prompt_guided_tool_instructions(&input, Some(&tools));
3145        assert!(!output.is_empty());
3146        assert_eq!(output[0].role, "system");
3147        assert!(output[0].content.contains("Available Tools"));
3148        assert!(output[0].content.contains("shell_exec"));
3149    }
3150
3151    #[test]
3152    fn reasoning_effort_only_applies_to_gpt5_and_codex_models() {
3153        let provider = make_provider("test", "https://example.com", None)
3154            .with_reasoning_effort(Some("high".to_string()));
3155
3156        assert_eq!(
3157            provider.reasoning_effort_for_model("gpt-5.3-codex"),
3158            Some("high".to_string())
3159        );
3160        assert_eq!(
3161            provider.reasoning_effort_for_model("openai/gpt-5"),
3162            Some("high".to_string())
3163        );
3164        assert_eq!(provider.reasoning_effort_for_model("llama-3.3-70b"), None);
3165    }
3166
3167    #[tokio::test]
3168    async fn warmup_without_key_is_noop() {
3169        let provider = make_provider("test", "https://example.com", None);
3170        let result = provider.warmup().await;
3171        assert!(result.is_ok());
3172    }
3173
3174    // ══════════════════════════════════════════════════════════
3175    // Native tool calling tests
3176    // ══════════════════════════════════════════════════════════
3177
3178    #[test]
3179    fn capabilities_reports_native_tool_calling() {
3180        let p = make_provider("test", "https://example.com", None);
3181        let caps = <OpenAiCompatibleProvider as Provider>::capabilities(&p);
3182        assert!(caps.native_tool_calling);
3183        assert!(!caps.vision);
3184    }
3185
3186    #[test]
3187    fn capabilities_reports_vision_for_qwen_compatible_provider() {
3188        let p = OpenAiCompatibleProvider::new_with_vision(
3189            "Qwen",
3190            "https://dashscope.aliyuncs.com/compatible-mode/v1",
3191            Some("k"),
3192            AuthStyle::Bearer,
3193            true,
3194        );
3195        let caps = <OpenAiCompatibleProvider as Provider>::capabilities(&p);
3196        assert!(caps.native_tool_calling);
3197        assert!(caps.vision);
3198    }
3199
3200    #[test]
3201    fn minimax_provider_disables_native_tool_calling() {
3202        let p = OpenAiCompatibleProvider::new_merge_system_into_user(
3203            "MiniMax",
3204            "https://api.minimax.chat/v1",
3205            Some("k"),
3206            AuthStyle::Bearer,
3207        );
3208        let caps = <OpenAiCompatibleProvider as Provider>::capabilities(&p);
3209        assert!(
3210            !caps.native_tool_calling,
3211            "MiniMax should use prompt-guided tool calling, not native"
3212        );
3213        assert!(!caps.vision);
3214    }
3215
3216    #[test]
3217    fn user_agent_constructor_keeps_native_tool_calling_enabled() {
3218        let p = OpenAiCompatibleProvider::new_with_user_agent(
3219            "TestProvider",
3220            "https://example.com",
3221            Some("k"),
3222            AuthStyle::Bearer,
3223            "construct-test/1.0",
3224        );
3225        let caps = <OpenAiCompatibleProvider as Provider>::capabilities(&p);
3226        assert!(caps.native_tool_calling);
3227        assert!(!caps.vision);
3228        assert_eq!(p.user_agent.as_deref(), Some("construct-test/1.0"));
3229    }
3230
3231    #[test]
3232    fn user_agent_and_vision_constructor_preserves_capability_flags() {
3233        let p = OpenAiCompatibleProvider::new_with_user_agent_and_vision(
3234            "VisionProvider",
3235            "https://example.com",
3236            Some("k"),
3237            AuthStyle::Bearer,
3238            "construct-test/vision",
3239            true,
3240        );
3241        let caps = <OpenAiCompatibleProvider as Provider>::capabilities(&p);
3242        assert!(caps.native_tool_calling);
3243        assert!(caps.vision);
3244        assert_eq!(p.user_agent.as_deref(), Some("construct-test/vision"));
3245    }
3246
3247    #[test]
3248    fn no_responses_fallback_constructor_keeps_native_tool_calling_enabled() {
3249        let p = OpenAiCompatibleProvider::new_no_responses_fallback(
3250            "FallbackProvider",
3251            "https://example.com",
3252            Some("k"),
3253            AuthStyle::Bearer,
3254        );
3255        let caps = <OpenAiCompatibleProvider as Provider>::capabilities(&p);
3256        assert!(caps.native_tool_calling);
3257        assert!(!caps.vision);
3258        assert!(p.user_agent.is_none());
3259    }
3260
3261    #[test]
3262    fn to_message_content_converts_image_markers_to_openai_parts() {
3263        let content = "Describe this\n\n[IMAGE:data:image/png;base64,abcd]";
3264        let value = serde_json::to_value(OpenAiCompatibleProvider::to_message_content(
3265            "user", content, true,
3266        ))
3267        .unwrap();
3268        let parts = value
3269            .as_array()
3270            .expect("multimodal content should be an array");
3271        assert_eq!(parts.len(), 2);
3272        assert_eq!(parts[0]["type"], "text");
3273        assert_eq!(parts[0]["text"], "Describe this");
3274        assert_eq!(parts[1]["type"], "image_url");
3275        assert_eq!(parts[1]["image_url"]["url"], "data:image/png;base64,abcd");
3276    }
3277
3278    #[test]
3279    fn to_message_content_keeps_markers_as_text_when_user_image_parts_disabled() {
3280        let content = "Policy [IMAGE:data:image/png;base64,abcd]";
3281        let value = serde_json::to_value(OpenAiCompatibleProvider::to_message_content(
3282            "user", content, false,
3283        ))
3284        .unwrap();
3285        assert_eq!(value, serde_json::json!(content));
3286    }
3287
3288    #[test]
3289    fn to_message_content_keeps_plain_text_for_non_user_roles() {
3290        let value = serde_json::to_value(OpenAiCompatibleProvider::to_message_content(
3291            "system",
3292            "You are a helpful assistant.",
3293            true,
3294        ))
3295        .unwrap();
3296        assert_eq!(value, serde_json::json!("You are a helpful assistant."));
3297    }
3298
3299    #[test]
3300    fn tool_specs_convert_to_openai_format() {
3301        let specs = vec![crate::tools::ToolSpec {
3302            name: "shell".to_string(),
3303            description: "Run shell command".to_string(),
3304            parameters: serde_json::json!({
3305                "type": "object",
3306                "properties": {"command": {"type": "string"}},
3307                "required": ["command"]
3308            }),
3309        }];
3310
3311        let tools = OpenAiCompatibleProvider::tool_specs_to_openai_format(&specs);
3312        assert_eq!(tools.len(), 1);
3313        assert_eq!(tools[0]["type"], "function");
3314        assert_eq!(tools[0]["function"]["name"], "shell");
3315        assert_eq!(tools[0]["function"]["description"], "Run shell command");
3316        assert_eq!(tools[0]["function"]["parameters"]["required"][0], "command");
3317    }
3318
3319    #[test]
3320    fn request_serializes_with_tools() {
3321        let tools = vec![serde_json::json!({
3322            "type": "function",
3323            "function": {
3324                "name": "get_weather",
3325                "description": "Get weather for a location",
3326                "parameters": {
3327                    "type": "object",
3328                    "properties": {
3329                        "location": {"type": "string"}
3330                    }
3331                }
3332            }
3333        })];
3334
3335        let req = ApiChatRequest {
3336            model: "test-model".to_string(),
3337            messages: vec![Message {
3338                role: "user".to_string(),
3339                content: MessageContent::Text("What is the weather?".to_string()),
3340            }],
3341            temperature: 0.7,
3342            stream: Some(false),
3343            stream_options: None,
3344            reasoning_effort: None,
3345            tool_stream: None,
3346            tools: Some(tools),
3347            tool_choice: Some("auto".to_string()),
3348            max_tokens: None,
3349        };
3350        let json = serde_json::to_string(&req).unwrap();
3351        assert!(json.contains("\"tools\""));
3352        assert!(json.contains("get_weather"));
3353        assert!(json.contains("\"tool_choice\":\"auto\""));
3354    }
3355
3356    #[test]
3357    fn zai_tool_requests_enable_tool_stream() {
3358        let provider = make_provider("zai", "https://api.z.ai/api/paas/v4", None);
3359        let req = ApiChatRequest {
3360            model: "glm-5".to_string(),
3361            messages: vec![Message {
3362                role: "user".to_string(),
3363                content: MessageContent::Text("List /tmp".to_string()),
3364            }],
3365            temperature: 0.7,
3366            stream: Some(false),
3367            stream_options: None,
3368            reasoning_effort: None,
3369            tool_stream: provider.tool_stream_for_tools(true),
3370            tools: Some(vec![serde_json::json!({
3371                "type": "function",
3372                "function": {
3373                    "name": "shell",
3374                    "description": "Run a shell command",
3375                    "parameters": {
3376                        "type": "object",
3377                        "properties": {
3378                            "command": {"type": "string"}
3379                        }
3380                    }
3381                }
3382            })]),
3383            tool_choice: Some("auto".to_string()),
3384            max_tokens: None,
3385        };
3386
3387        let json = serde_json::to_string(&req).unwrap();
3388        assert!(json.contains("\"tool_stream\":true"));
3389    }
3390
3391    #[test]
3392    fn non_zai_tool_requests_omit_tool_stream() {
3393        let provider = make_provider("test", "https://api.example.com/v1", None);
3394        let req = ApiChatRequest {
3395            model: "test-model".to_string(),
3396            messages: vec![Message {
3397                role: "user".to_string(),
3398                content: MessageContent::Text("List /tmp".to_string()),
3399            }],
3400            temperature: 0.7,
3401            stream: Some(false),
3402            stream_options: None,
3403            reasoning_effort: None,
3404            tool_stream: provider.tool_stream_for_tools(true),
3405            tools: Some(vec![serde_json::json!({
3406                "type": "function",
3407                "function": {
3408                    "name": "shell",
3409                    "description": "Run a shell command",
3410                    "parameters": {
3411                        "type": "object",
3412                        "properties": {
3413                            "command": {"type": "string"}
3414                        }
3415                    }
3416                }
3417            })]),
3418            tool_choice: Some("auto".to_string()),
3419            max_tokens: None,
3420        };
3421
3422        let json = serde_json::to_string(&req).unwrap();
3423        assert!(!json.contains("\"tool_stream\""));
3424    }
3425
3426    #[test]
3427    fn z_ai_host_enables_tool_stream_for_custom_profiles() {
3428        let provider = make_provider("custom", "https://api.z.ai/api/coding/paas/v4", None);
3429        assert_eq!(provider.tool_stream_for_tools(true), Some(true));
3430    }
3431
3432    #[test]
3433    fn response_with_tool_calls_deserializes() {
3434        let json = r#"{
3435            "choices": [{
3436                "message": {
3437                    "content": null,
3438                    "tool_calls": [{
3439                        "type": "function",
3440                        "function": {
3441                            "name": "get_weather",
3442                            "arguments": "{\"location\":\"London\"}"
3443                        }
3444                    }]
3445                }
3446            }]
3447        }"#;
3448
3449        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
3450        let msg = &resp.choices[0].message;
3451        assert!(msg.content.is_none());
3452        let tool_calls = msg.tool_calls.as_ref().unwrap();
3453        assert_eq!(tool_calls.len(), 1);
3454        assert_eq!(
3455            tool_calls[0].function.as_ref().unwrap().name.as_deref(),
3456            Some("get_weather")
3457        );
3458        assert_eq!(
3459            tool_calls[0]
3460                .function
3461                .as_ref()
3462                .unwrap()
3463                .arguments
3464                .as_deref(),
3465            Some("{\"location\":\"London\"}")
3466        );
3467    }
3468
3469    #[test]
3470    fn response_with_multiple_tool_calls() {
3471        let json = r#"{
3472            "choices": [{
3473                "message": {
3474                    "content": "I'll check both.",
3475                    "tool_calls": [
3476                        {
3477                            "type": "function",
3478                            "function": {
3479                                "name": "get_weather",
3480                                "arguments": "{\"location\":\"London\"}"
3481                            }
3482                        },
3483                        {
3484                            "type": "function",
3485                            "function": {
3486                                "name": "get_time",
3487                                "arguments": "{\"timezone\":\"UTC\"}"
3488                            }
3489                        }
3490                    ]
3491                }
3492            }]
3493        }"#;
3494
3495        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
3496        let msg = &resp.choices[0].message;
3497        assert_eq!(msg.content.as_deref(), Some("I'll check both."));
3498        let tool_calls = msg.tool_calls.as_ref().unwrap();
3499        assert_eq!(tool_calls.len(), 2);
3500        assert_eq!(
3501            tool_calls[0].function.as_ref().unwrap().name.as_deref(),
3502            Some("get_weather")
3503        );
3504        assert_eq!(
3505            tool_calls[1].function.as_ref().unwrap().name.as_deref(),
3506            Some("get_time")
3507        );
3508    }
3509
3510    #[tokio::test]
3511    async fn chat_with_tools_fails_without_key() {
3512        let p = make_provider("TestProvider", "https://example.com", None);
3513        let messages = vec![ChatMessage {
3514            role: "user".to_string(),
3515            content: "hello".to_string(),
3516        }];
3517        let tools = vec![serde_json::json!({
3518            "type": "function",
3519            "function": {
3520                "name": "test_tool",
3521                "description": "A test tool",
3522                "parameters": {}
3523            }
3524        })];
3525
3526        let result = p.chat_with_tools(&messages, &tools, "model", 0.7).await;
3527        assert!(result.is_err());
3528        assert!(
3529            result
3530                .unwrap_err()
3531                .to_string()
3532                .contains("TestProvider API key not set")
3533        );
3534    }
3535
3536    #[test]
3537    fn response_with_no_tool_calls_has_empty_vec() {
3538        let json = r#"{"choices":[{"message":{"content":"Just text, no tools."}}]}"#;
3539        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
3540        let msg = &resp.choices[0].message;
3541        assert_eq!(msg.content.as_deref(), Some("Just text, no tools."));
3542        assert!(msg.tool_calls.is_none());
3543    }
3544
3545    #[test]
3546    fn flatten_system_messages_merges_into_first_user_and_removes_system_roles() {
3547        let messages = vec![
3548            ChatMessage::system("System A"),
3549            ChatMessage::assistant("Earlier assistant turn"),
3550            ChatMessage::system("System B"),
3551            ChatMessage::user("User turn"),
3552            ChatMessage::tool(r#"{"ok":true}"#),
3553        ];
3554
3555        let flattened = OpenAiCompatibleProvider::flatten_system_messages(&messages);
3556        assert_eq!(flattened.len(), 3);
3557        assert_eq!(flattened[0].role, "assistant");
3558        assert_eq!(
3559            flattened[1].content,
3560            "System A\n\nSystem B\n\nUser turn".to_string()
3561        );
3562        assert_eq!(flattened[1].role, "user");
3563        assert_eq!(flattened[2].role, "tool");
3564        assert!(!flattened.iter().any(|m| m.role == "system"));
3565    }
3566
3567    #[test]
3568    fn flatten_system_messages_inserts_synthetic_user_when_no_user_exists() {
3569        let messages = vec![
3570            ChatMessage::assistant("Assistant only"),
3571            ChatMessage::system("Synthetic system"),
3572        ];
3573
3574        let flattened = OpenAiCompatibleProvider::flatten_system_messages(&messages);
3575        assert_eq!(flattened.len(), 2);
3576        assert_eq!(flattened[0].role, "user");
3577        assert_eq!(flattened[0].content, "Synthetic system");
3578        assert_eq!(flattened[1].role, "assistant");
3579    }
3580
3581    #[test]
3582    fn strip_think_tags_removes_multiple_blocks_with_surrounding_text() {
3583        let input = "Answer A <think>hidden 1</think> and B <think>hidden 2</think> done";
3584        let output = strip_think_tags(input);
3585        assert_eq!(output, "Answer A  and B  done");
3586    }
3587
3588    #[test]
3589    fn strip_think_tags_drops_tail_for_unclosed_block() {
3590        let input = "Visible<think>hidden tail";
3591        let output = strip_think_tags(input);
3592        assert_eq!(output, "Visible");
3593    }
3594
3595    // ----------------------------------------------------------
3596    // Reasoning model fallback tests (reasoning_content)
3597    // ----------------------------------------------------------
3598
3599    #[test]
3600    fn reasoning_content_fallback_when_content_empty() {
3601        // Reasoning models (Qwen3, GLM-4) return content: "" with reasoning_content populated
3602        let json = r#"{"choices":[{"message":{"content":"","reasoning_content":"Thinking output here"}}]}"#;
3603        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
3604        let msg = &resp.choices[0].message;
3605        assert_eq!(msg.effective_content(), "Thinking output here");
3606    }
3607
3608    #[test]
3609    fn reasoning_content_fallback_when_content_null() {
3610        // Some models may return content: null with reasoning_content
3611        let json =
3612            r#"{"choices":[{"message":{"content":null,"reasoning_content":"Fallback text"}}]}"#;
3613        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
3614        let msg = &resp.choices[0].message;
3615        assert_eq!(msg.effective_content(), "Fallback text");
3616    }
3617
3618    #[test]
3619    fn reasoning_content_fallback_when_content_missing() {
3620        // content field absent entirely, reasoning_content present
3621        let json = r#"{"choices":[{"message":{"reasoning_content":"Only reasoning"}}]}"#;
3622        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
3623        let msg = &resp.choices[0].message;
3624        assert_eq!(msg.effective_content(), "Only reasoning");
3625    }
3626
3627    #[test]
3628    fn reasoning_content_not_used_when_content_present() {
3629        // Normal model: content populated, reasoning_content should be ignored
3630        let json = r#"{"choices":[{"message":{"content":"Normal response","reasoning_content":"Should be ignored"}}]}"#;
3631        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
3632        let msg = &resp.choices[0].message;
3633        assert_eq!(msg.effective_content(), "Normal response");
3634    }
3635
3636    #[test]
3637    fn reasoning_content_used_when_content_only_think_tags() {
3638        let json = r#"{"choices":[{"message":{"content":"<think>secret</think>","reasoning_content":"Fallback text"}}]}"#;
3639        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
3640        let msg = &resp.choices[0].message;
3641        assert_eq!(msg.effective_content(), "Fallback text");
3642        assert_eq!(
3643            msg.effective_content_optional().as_deref(),
3644            Some("Fallback text")
3645        );
3646    }
3647
3648    #[test]
3649    fn reasoning_content_both_absent_returns_empty() {
3650        // Neither content nor reasoning_content - returns empty string
3651        let json = r#"{"choices":[{"message":{}}]}"#;
3652        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
3653        let msg = &resp.choices[0].message;
3654        assert_eq!(msg.effective_content(), "");
3655    }
3656
3657    #[test]
3658    fn reasoning_content_ignored_by_normal_models() {
3659        // Standard response without reasoning_content still works
3660        let json = r#"{"choices":[{"message":{"content":"Hello from Venice!"}}]}"#;
3661        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
3662        let msg = &resp.choices[0].message;
3663        assert!(msg.reasoning_content.is_none());
3664        assert_eq!(msg.effective_content(), "Hello from Venice!");
3665    }
3666
3667    // ----------------------------------------------------------
3668    // SSE streaming reasoning_content fallback tests
3669    // ----------------------------------------------------------
3670
3671    #[test]
3672    fn parse_sse_line_with_content() {
3673        let line = r#"data: {"choices":[{"delta":{"content":"hello"}}]}"#;
3674        let result = parse_sse_line(line).unwrap().unwrap();
3675        assert_eq!(result.delta, "hello");
3676        assert!(result.reasoning.is_none());
3677    }
3678
3679    #[test]
3680    fn parse_sse_line_with_reasoning_content() {
3681        let line = r#"data: {"choices":[{"delta":{"reasoning_content":"thinking..."}}]}"#;
3682        let result = parse_sse_line(line).unwrap().unwrap();
3683        assert!(result.delta.is_empty());
3684        assert_eq!(result.reasoning.as_deref(), Some("thinking..."));
3685    }
3686
3687    #[test]
3688    fn parse_sse_line_with_both_prefers_content() {
3689        let line = r#"data: {"choices":[{"delta":{"content":"real answer","reasoning_content":"thinking..."}}]}"#;
3690        let result = parse_sse_line(line).unwrap().unwrap();
3691        assert_eq!(result.delta, "real answer");
3692        assert!(result.reasoning.is_none());
3693    }
3694
3695    #[test]
3696    fn parse_sse_line_with_empty_content_falls_back_to_reasoning() {
3697        let line =
3698            r#"data: {"choices":[{"delta":{"content":"","reasoning_content":"thinking..."}}]}"#;
3699        let result = parse_sse_line(line).unwrap().unwrap();
3700        assert!(result.delta.is_empty());
3701        assert_eq!(result.reasoning.as_deref(), Some("thinking..."));
3702    }
3703
3704    #[test]
3705    fn parse_sse_line_done_sentinel() {
3706        let line = "data: [DONE]";
3707        let result = parse_sse_line(line).unwrap();
3708        assert!(result.is_none());
3709    }
3710
3711    #[test]
3712    fn parse_sse_chunk_with_tool_call_delta() {
3713        let line = r#"data: {"choices":[{"delta":{"tool_calls":[{"index":0,"id":"call_1","function":{"name":"shell","arguments":"{\"command\":\"date\"}"}}]}}]}"#;
3714        let chunk = parse_sse_chunk(line)
3715            .unwrap()
3716            .expect("chunk should be parsed");
3717        let choice = chunk.choices.first().expect("choice should exist");
3718        let tool_calls = choice
3719            .delta
3720            .tool_calls
3721            .as_ref()
3722            .expect("tool call deltas should exist");
3723        assert_eq!(tool_calls.len(), 1);
3724        assert_eq!(tool_calls[0].index, Some(0));
3725        assert_eq!(tool_calls[0].id.as_deref(), Some("call_1"));
3726        assert_eq!(
3727            tool_calls[0]
3728                .function
3729                .as_ref()
3730                .and_then(|function| function.name.as_deref()),
3731            Some("shell")
3732        );
3733    }
3734
3735    #[test]
3736    fn stream_tool_call_accumulator_combines_deltas() {
3737        let mut acc = StreamToolCallAccumulator::default();
3738        acc.apply_delta(&StreamToolCallDelta {
3739            index: Some(0),
3740            id: Some("call_1".to_string()),
3741            function: Some(StreamFunctionDelta {
3742                name: Some("shell".to_string()),
3743                arguments: Some("{\"command\":\"".to_string()),
3744            }),
3745            name: None,
3746            arguments: None,
3747        });
3748        acc.apply_delta(&StreamToolCallDelta {
3749            index: Some(0),
3750            id: None,
3751            function: Some(StreamFunctionDelta {
3752                name: None,
3753                arguments: Some("date\"}".to_string()),
3754            }),
3755            name: None,
3756            arguments: None,
3757        });
3758
3759        let tool_call = acc
3760            .into_provider_tool_call()
3761            .expect("accumulator should emit tool call");
3762        assert_eq!(tool_call.id, "call_1");
3763        assert_eq!(tool_call.name, "shell");
3764        assert_eq!(tool_call.arguments, r#"{"command":"date"}"#);
3765    }
3766
3767    #[test]
3768    fn api_response_parses_usage() {
3769        let json = r#"{
3770            "choices": [{"message": {"content": "Hello"}}],
3771            "usage": {"prompt_tokens": 150, "completion_tokens": 60}
3772        }"#;
3773        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
3774        let usage = resp.usage.unwrap();
3775        assert_eq!(usage.prompt_tokens, Some(150));
3776        assert_eq!(usage.completion_tokens, Some(60));
3777    }
3778
3779    #[test]
3780    fn api_response_parses_without_usage() {
3781        let json = r#"{"choices": [{"message": {"content": "Hello"}}]}"#;
3782        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
3783        assert!(resp.usage.is_none());
3784    }
3785
3786    // ═══════════════════════════════════════════════════════════════════════
3787    // reasoning_content pass-through tests
3788    // ═══════════════════════════════════════════════════════════════════════
3789
3790    #[test]
3791    fn parse_native_response_captures_reasoning_content() {
3792        let message = ResponseMessage {
3793            content: Some("answer".to_string()),
3794            reasoning_content: Some("thinking step".to_string()),
3795            tool_calls: Some(vec![ToolCall {
3796                id: Some("call_1".to_string()),
3797                kind: Some("function".to_string()),
3798                function: Some(Function {
3799                    name: Some("shell".to_string()),
3800                    arguments: Some(r#"{"cmd":"ls"}"#.to_string()),
3801                }),
3802                name: None,
3803                arguments: None,
3804                parameters: None,
3805            }]),
3806        };
3807
3808        let parsed = OpenAiCompatibleProvider::parse_native_response(message);
3809        assert_eq!(parsed.reasoning_content.as_deref(), Some("thinking step"));
3810        assert_eq!(parsed.text.as_deref(), Some("answer"));
3811        assert_eq!(parsed.tool_calls.len(), 1);
3812    }
3813
3814    #[test]
3815    fn parse_native_response_none_reasoning_content_for_normal_model() {
3816        let message = ResponseMessage {
3817            content: Some("hello".to_string()),
3818            reasoning_content: None,
3819            tool_calls: None,
3820        };
3821
3822        let parsed = OpenAiCompatibleProvider::parse_native_response(message);
3823        assert!(parsed.reasoning_content.is_none());
3824        assert_eq!(parsed.text.as_deref(), Some("hello"));
3825    }
3826
3827    #[test]
3828    fn convert_messages_for_native_round_trips_reasoning_content() {
3829        // Simulate stored assistant history JSON that includes reasoning_content
3830        let history_json = serde_json::json!({
3831            "content": "I will check",
3832            "tool_calls": [{
3833                "id": "tc_1",
3834                "name": "shell",
3835                "arguments": "{\"cmd\":\"ls\"}"
3836            }],
3837            "reasoning_content": "Let me think about this..."
3838        });
3839
3840        let messages = vec![ChatMessage::assistant(history_json.to_string())];
3841        let native = OpenAiCompatibleProvider::convert_messages_for_native(&messages, true);
3842        assert_eq!(native.len(), 1);
3843        assert_eq!(native[0].role, "assistant");
3844        assert_eq!(
3845            native[0].reasoning_content.as_deref(),
3846            Some("Let me think about this...")
3847        );
3848        assert!(native[0].tool_calls.is_some());
3849    }
3850
3851    #[test]
3852    fn convert_messages_for_native_no_reasoning_content_when_absent() {
3853        // Normal model history without reasoning_content key
3854        let history_json = serde_json::json!({
3855            "content": "I will check",
3856            "tool_calls": [{
3857                "id": "tc_1",
3858                "name": "shell",
3859                "arguments": "{\"cmd\":\"ls\"}"
3860            }]
3861        });
3862
3863        let messages = vec![ChatMessage::assistant(history_json.to_string())];
3864        let native = OpenAiCompatibleProvider::convert_messages_for_native(&messages, true);
3865        assert_eq!(native.len(), 1);
3866        assert!(native[0].reasoning_content.is_none());
3867    }
3868
3869    #[test]
3870    fn convert_messages_for_native_reasoning_content_serialized_only_when_present() {
3871        // Verify skip_serializing_if works: reasoning_content omitted from JSON when None
3872        let msg_without = NativeMessage {
3873            role: "assistant".to_string(),
3874            content: Some(MessageContent::Text("hi".to_string())),
3875            tool_call_id: None,
3876            tool_calls: None,
3877            reasoning_content: None,
3878        };
3879        let json = serde_json::to_string(&msg_without).unwrap();
3880        assert!(
3881            !json.contains("reasoning_content"),
3882            "reasoning_content should be omitted when None"
3883        );
3884
3885        let msg_with = NativeMessage {
3886            role: "assistant".to_string(),
3887            content: Some(MessageContent::Text("hi".to_string())),
3888            tool_call_id: None,
3889            tool_calls: None,
3890            reasoning_content: Some("thinking...".to_string()),
3891        };
3892        let json = serde_json::to_string(&msg_with).unwrap();
3893        assert!(
3894            json.contains("reasoning_content"),
3895            "reasoning_content should be present when Some"
3896        );
3897        assert!(json.contains("thinking..."));
3898    }
3899
3900    #[test]
3901    fn default_timeout_is_120s() {
3902        let p = make_provider("test", "https://example.com", None);
3903        assert_eq!(p.timeout_secs, 120);
3904    }
3905
3906    #[test]
3907    fn with_timeout_secs_overrides_default() {
3908        let p = make_provider("test", "https://example.com", None).with_timeout_secs(300);
3909        assert_eq!(p.timeout_secs, 300);
3910    }
3911
3912    #[test]
3913    fn extra_headers_default_empty() {
3914        let p = make_provider("test", "https://example.com", None);
3915        assert!(p.extra_headers.is_empty());
3916    }
3917
3918    #[test]
3919    fn with_extra_headers_sets_headers() {
3920        let mut headers = std::collections::HashMap::new();
3921        headers.insert("X-Title".to_string(), "construct".to_string());
3922        headers.insert(
3923            "HTTP-Referer".to_string(),
3924            "https://example.com".to_string(),
3925        );
3926        let p = make_provider("test", "https://example.com", None).with_extra_headers(headers);
3927        assert_eq!(p.extra_headers.len(), 2);
3928        assert_eq!(p.extra_headers.get("X-Title").unwrap(), "construct");
3929        assert_eq!(
3930            p.extra_headers.get("HTTP-Referer").unwrap(),
3931            "https://example.com"
3932        );
3933    }
3934
3935    #[test]
3936    fn http_client_with_extra_headers_builds_successfully() {
3937        let mut headers = std::collections::HashMap::new();
3938        headers.insert("X-Title".to_string(), "construct".to_string());
3939        headers.insert("User-Agent".to_string(), "TestAgent/1.0".to_string());
3940        let p = make_provider("test", "https://example.com", None).with_extra_headers(headers);
3941        // Should not panic
3942        let _client = p.http_client();
3943    }
3944
3945    #[test]
3946    fn http_client_without_extra_headers_or_user_agent() {
3947        let p = make_provider("test", "https://example.com", None);
3948        // Should use the cached proxy client path
3949        let _client = p.http_client();
3950    }
3951
3952    #[test]
3953    fn extra_headers_combined_with_user_agent() {
3954        let mut headers = std::collections::HashMap::new();
3955        headers.insert("X-Title".to_string(), "construct".to_string());
3956        let p = OpenAiCompatibleProvider::new_with_user_agent(
3957            "test",
3958            "https://example.com",
3959            None,
3960            AuthStyle::Bearer,
3961            "CustomAgent/1.0",
3962        )
3963        .with_extra_headers(headers);
3964        assert_eq!(p.user_agent.as_deref(), Some("CustomAgent/1.0"));
3965        assert_eq!(p.extra_headers.len(), 1);
3966        // Should not panic
3967        let _client = p.http_client();
3968    }
3969
3970    #[test]
3971    fn tool_call_none_fields_omitted_from_json() {
3972        // Ensures providers like Mistral that reject extra fields (e.g. "name": null)
3973        // don't receive them when the ToolCall compat fields are None.
3974        let tc = ToolCall {
3975            id: Some("call_1".to_string()),
3976            kind: Some("function".to_string()),
3977            function: Some(Function {
3978                name: Some("shell".to_string()),
3979                arguments: Some("{\"command\":\"ls\"}".to_string()),
3980            }),
3981            name: None,
3982            arguments: None,
3983            parameters: None,
3984        };
3985        let json = serde_json::to_value(&tc).unwrap();
3986        assert!(!json.as_object().unwrap().contains_key("name"));
3987        assert!(!json.as_object().unwrap().contains_key("arguments"));
3988        assert!(!json.as_object().unwrap().contains_key("parameters"));
3989        // Standard fields must be present
3990        assert!(json.as_object().unwrap().contains_key("id"));
3991        assert!(json.as_object().unwrap().contains_key("type"));
3992        assert!(json.as_object().unwrap().contains_key("function"));
3993    }
3994
3995    #[test]
3996    fn tool_call_with_compat_fields_serializes_them() {
3997        // When compat fields are Some, they should appear in the output.
3998        let tc = ToolCall {
3999            id: None,
4000            kind: None,
4001            function: None,
4002            name: Some("shell".to_string()),
4003            arguments: Some("{\"command\":\"ls\"}".to_string()),
4004            parameters: None,
4005        };
4006        let json = serde_json::to_value(&tc).unwrap();
4007        assert_eq!(json["name"], "shell");
4008        assert_eq!(json["arguments"], "{\"command\":\"ls\"}");
4009        // None fields should be omitted
4010        assert!(!json.as_object().unwrap().contains_key("id"));
4011        assert!(!json.as_object().unwrap().contains_key("type"));
4012        assert!(!json.as_object().unwrap().contains_key("function"));
4013        assert!(!json.as_object().unwrap().contains_key("parameters"));
4014    }
4015
4016    // ── parse_proxy_tool_event tests ──
4017
4018    #[test]
4019    fn proxy_tool_start_valid() {
4020        let line = r#"data: {"x_tool_start":{"name":"bash","arguments":"{\"cmd\":\"ls\"}"}}"#;
4021        let event = parse_proxy_tool_event(line);
4022        assert!(matches!(
4023            event,
4024            Some(StreamEvent::PreExecutedToolCall { ref name, ref args })
4025            if name == "bash" && args == r#"{"cmd":"ls"}"#
4026        ));
4027    }
4028
4029    #[test]
4030    fn proxy_tool_start_missing_name_returns_none() {
4031        let line = r#"data: {"x_tool_start":{"arguments":"{}"}}"#;
4032        assert!(parse_proxy_tool_event(line).is_none());
4033    }
4034
4035    #[test]
4036    fn proxy_tool_start_missing_arguments_defaults() {
4037        let line = r#"data: {"x_tool_start":{"name":"read"}}"#;
4038        let event = parse_proxy_tool_event(line);
4039        assert!(matches!(
4040            event,
4041            Some(StreamEvent::PreExecutedToolCall { ref name, ref args })
4042            if name == "read" && args == "{}"
4043        ));
4044    }
4045
4046    #[test]
4047    fn proxy_tool_result_valid() {
4048        let line = r#"data: {"x_tool_result":{"name":"bash","output":"hello world"}}"#;
4049        let event = parse_proxy_tool_event(line);
4050        assert!(matches!(
4051            event,
4052            Some(StreamEvent::PreExecutedToolResult { ref name, ref output })
4053            if name == "bash" && output == "hello world"
4054        ));
4055    }
4056
4057    #[test]
4058    fn proxy_tool_result_missing_fields_uses_defaults() {
4059        let line = r#"data: {"x_tool_result":{}}"#;
4060        let event = parse_proxy_tool_event(line);
4061        assert!(matches!(
4062            event,
4063            Some(StreamEvent::PreExecutedToolResult { ref name, ref output })
4064            if name == "unknown" && output.is_empty()
4065        ));
4066    }
4067
4068    #[test]
4069    fn proxy_tool_event_non_json_returns_none() {
4070        assert!(parse_proxy_tool_event("data: not json").is_none());
4071    }
4072
4073    #[test]
4074    fn proxy_tool_event_no_data_prefix_returns_none() {
4075        let line = r#"{"x_tool_start":{"name":"bash"}}"#;
4076        assert!(parse_proxy_tool_event(line).is_none());
4077    }
4078
4079    #[test]
4080    fn proxy_tool_event_standard_openai_chunk_returns_none() {
4081        let line = r#"data: {"id":"chatcmpl-1","choices":[{"delta":{"content":"hi"}}]}"#;
4082        assert!(parse_proxy_tool_event(line).is_none());
4083    }
4084
4085    #[test]
4086    fn proxy_tool_event_done_sentinel_returns_none() {
4087        assert!(parse_proxy_tool_event("data: [DONE]").is_none());
4088    }
4089}
construct/providers/compatible.rs

construct/providers/
compatible.rs