construct/providers/
compatible.rs

1//! Generic OpenAI-compatible provider.
2//! Most LLM APIs follow the same `/v1/chat/completions` format.
3//! This module provides a single implementation that works for all of them.
4
5use crate::multimodal;
6use crate::providers::traits::{
7    ChatMessage, ChatRequest as ProviderChatRequest, ChatResponse as ProviderChatResponse,
8    Provider, StreamChunk, StreamError, StreamEvent, StreamOptions, StreamResult, TokenUsage,
9    ToolCall as ProviderToolCall,
10};
11use async_trait::async_trait;
12use futures_util::{StreamExt, stream};
13use reqwest::{
14    Client,
15    header::{HeaderMap, HeaderValue, USER_AGENT},
16};
17use serde::{Deserialize, Serialize};
18
19/// A provider that speaks the OpenAI-compatible chat completions API.
20/// Used by: Venice, Vercel AI Gateway, Cloudflare AI Gateway, Moonshot,
21/// Synthetic, `OpenCode` Zen, `OpenCode` Go, `Z.AI`, `GLM`, `MiniMax`, Bedrock, Qianfan, Groq, Mistral, `xAI`, etc.
22#[allow(clippy::struct_excessive_bools)]
23pub struct OpenAiCompatibleProvider {
24    pub(crate) name: String,
25    pub(crate) base_url: String,
26    pub(crate) credential: Option<String>,
27    pub(crate) auth_header: AuthStyle,
28    supports_vision: bool,
29    /// When false, do not fall back to /v1/responses on chat completions 404.
30    /// GLM/Zhipu does not support the responses API.
31    supports_responses_fallback: bool,
32    user_agent: Option<String>,
33    /// When true, collect all `system` messages and prepend their content
34    /// to the first `user` message, then drop the system messages.
35    /// Required for providers that reject `role: system` (e.g. MiniMax).
36    merge_system_into_user: bool,
37    /// Whether this provider supports OpenAI-style native tool calling.
38    /// When false, tools are injected into the system prompt as text.
39    native_tool_calling: bool,
40    /// HTTP request timeout in seconds for LLM API calls. Default: 120.
41    timeout_secs: u64,
42    /// Extra HTTP headers to include in all API requests.
43    extra_headers: std::collections::HashMap<String, String>,
44    /// Optional reasoning effort for GPT-5/Codex-compatible backends.
45    reasoning_effort: Option<String>,
46    /// Custom API path suffix (e.g. "/v2/generate").
47    /// When set, overrides the default `/chat/completions` path detection.
48    api_path: Option<String>,
49    /// Maximum output tokens to include in API requests.
50    max_tokens: Option<u32>,
51}
52
53/// How the provider expects the API key to be sent.
54#[derive(Debug, Clone)]
55pub enum AuthStyle {
56    /// `Authorization: Bearer <key>`
57    Bearer,
58    /// `x-api-key: <key>` (used by some Chinese providers)
59    XApiKey,
60    /// Custom header name
61    Custom(String),
62}
63
64impl OpenAiCompatibleProvider {
65    pub fn new(
66        name: &str,
67        base_url: &str,
68        credential: Option<&str>,
69        auth_style: AuthStyle,
70    ) -> Self {
71        Self::new_with_options(
72            name, base_url, credential, auth_style, false, true, None, false,
73        )
74    }
75
76    pub fn new_with_vision(
77        name: &str,
78        base_url: &str,
79        credential: Option<&str>,
80        auth_style: AuthStyle,
81        supports_vision: bool,
82    ) -> Self {
83        Self::new_with_options(
84            name,
85            base_url,
86            credential,
87            auth_style,
88            supports_vision,
89            true,
90            None,
91            false,
92        )
93    }
94
95    /// Same as `new` but skips the /v1/responses fallback on 404.
96    /// Use for providers (e.g. GLM) that only support chat completions.
97    pub fn new_no_responses_fallback(
98        name: &str,
99        base_url: &str,
100        credential: Option<&str>,
101        auth_style: AuthStyle,
102    ) -> Self {
103        Self::new_with_options(
104            name, base_url, credential, auth_style, false, false, None, false,
105        )
106    }
107
108    /// Create a provider with a custom User-Agent header.
109    ///
110    /// Some providers (for example Kimi Code) require a specific User-Agent
111    /// for request routing and policy enforcement.
112    pub fn new_with_user_agent(
113        name: &str,
114        base_url: &str,
115        credential: Option<&str>,
116        auth_style: AuthStyle,
117        user_agent: &str,
118    ) -> Self {
119        Self::new_with_options(
120            name,
121            base_url,
122            credential,
123            auth_style,
124            false,
125            true,
126            Some(user_agent),
127            false,
128        )
129    }
130
131    pub fn new_with_user_agent_and_vision(
132        name: &str,
133        base_url: &str,
134        credential: Option<&str>,
135        auth_style: AuthStyle,
136        user_agent: &str,
137        supports_vision: bool,
138    ) -> Self {
139        Self::new_with_options(
140            name,
141            base_url,
142            credential,
143            auth_style,
144            supports_vision,
145            true,
146            Some(user_agent),
147            false,
148        )
149    }
150
151    /// For providers that do not support `role: system` (e.g. MiniMax).
152    /// System prompt content is prepended to the first user message instead.
153    pub fn new_merge_system_into_user(
154        name: &str,
155        base_url: &str,
156        credential: Option<&str>,
157        auth_style: AuthStyle,
158    ) -> Self {
159        Self::new_with_options(
160            name, base_url, credential, auth_style, false, false, None, true,
161        )
162    }
163
164    fn new_with_options(
165        name: &str,
166        base_url: &str,
167        credential: Option<&str>,
168        auth_style: AuthStyle,
169        supports_vision: bool,
170        supports_responses_fallback: bool,
171        user_agent: Option<&str>,
172        merge_system_into_user: bool,
173    ) -> Self {
174        Self {
175            name: name.to_string(),
176            base_url: base_url.trim_end_matches('/').to_string(),
177            credential: credential.map(ToString::to_string),
178            auth_header: auth_style,
179            supports_vision,
180            supports_responses_fallback,
181            user_agent: user_agent.map(ToString::to_string),
182            merge_system_into_user,
183            native_tool_calling: !merge_system_into_user,
184            timeout_secs: 120,
185            extra_headers: std::collections::HashMap::new(),
186            reasoning_effort: None,
187            api_path: None,
188            max_tokens: None,
189        }
190    }
191
192    /// Disable native tool calling, forcing prompt-guided tool use instead.
193    pub fn without_native_tools(mut self) -> Self {
194        self.native_tool_calling = false;
195        self
196    }
197
198    /// Override the HTTP request timeout for LLM API calls.
199    pub fn with_timeout_secs(mut self, timeout_secs: u64) -> Self {
200        self.timeout_secs = timeout_secs;
201        self
202    }
203
204    /// Set extra HTTP headers to include in all API requests.
205    pub fn with_extra_headers(
206        mut self,
207        headers: std::collections::HashMap<String, String>,
208    ) -> Self {
209        self.extra_headers = headers;
210        self
211    }
212
213    /// Set reasoning effort for GPT-5/Codex-compatible chat-completions APIs.
214    pub fn with_reasoning_effort(mut self, reasoning_effort: Option<String>) -> Self {
215        self.reasoning_effort = reasoning_effort;
216        self
217    }
218
219    /// Set a custom API path suffix for this provider.
220    /// When set, replaces the default `/chat/completions` path.
221    pub fn with_api_path(mut self, api_path: Option<String>) -> Self {
222        self.api_path = api_path;
223        self
224    }
225
226    /// Set the maximum output tokens for API requests.
227    pub fn with_max_tokens(mut self, max_tokens: Option<u32>) -> Self {
228        self.max_tokens = max_tokens;
229        self
230    }
231
232    /// Collect all `system` role messages, concatenate their content,
233    /// and prepend to the first `user` message. Drop all system messages.
234    /// Used for providers (e.g. MiniMax) that reject `role: system`.
235    fn flatten_system_messages(messages: &[ChatMessage]) -> Vec<ChatMessage> {
236        let system_content: String = messages
237            .iter()
238            .filter(|m| m.role == "system")
239            .map(|m| m.content.as_str())
240            .collect::<Vec<_>>()
241            .join("\n\n");
242
243        if system_content.is_empty() {
244            return messages.to_vec();
245        }
246
247        let mut result: Vec<ChatMessage> = messages
248            .iter()
249            .filter(|m| m.role != "system")
250            .cloned()
251            .collect();
252
253        if let Some(first_user) = result.iter_mut().find(|m| m.role == "user") {
254            first_user.content = format!("{system_content}\n\n{}", first_user.content);
255        } else {
256            // No user message found: insert a synthetic user message with system content
257            result.insert(0, ChatMessage::user(&system_content));
258        }
259
260        result
261    }
262
263    fn http_client(&self) -> Client {
264        let timeout = self.timeout_secs;
265        let has_user_agent = self.user_agent.is_some();
266        let has_extra_headers = !self.extra_headers.is_empty();
267
268        if has_user_agent || has_extra_headers {
269            let mut headers = HeaderMap::new();
270            if let Some(ua) = self.user_agent.as_deref() {
271                if let Ok(value) = HeaderValue::from_str(ua) {
272                    headers.insert(USER_AGENT, value);
273                }
274            }
275            for (key, value) in &self.extra_headers {
276                match (
277                    reqwest::header::HeaderName::from_bytes(key.as_bytes()),
278                    HeaderValue::from_str(value),
279                ) {
280                    (Ok(name), Ok(val)) => {
281                        headers.insert(name, val);
282                    }
283                    _ => {
284                        tracing::warn!(header = key, "Skipping invalid extra header name or value");
285                    }
286                }
287            }
288
289            let builder = Client::builder()
290                .timeout(std::time::Duration::from_secs(timeout))
291                .connect_timeout(std::time::Duration::from_secs(10))
292                .default_headers(headers);
293            let builder =
294                crate::config::apply_runtime_proxy_to_builder(builder, "provider.compatible");
295
296            return builder.build().unwrap_or_else(|error| {
297                tracing::warn!(
298                    "Failed to build proxied timeout client with custom headers: {error}"
299                );
300                Client::new()
301            });
302        }
303
304        crate::config::build_runtime_proxy_client_with_timeouts("provider.compatible", timeout, 10)
305    }
306
307    /// Build the full URL for chat completions, detecting if base_url already includes the path.
308    /// This allows custom providers with non-standard endpoints (e.g., VolcEngine ARK uses
309    /// `/api/coding/v3/chat/completions` instead of `/v1/chat/completions`).
310    fn chat_completions_url(&self) -> String {
311        // If a custom api_path is configured, use it directly.
312        if let Some(ref api_path) = self.api_path {
313            let separator = if api_path.starts_with('/') { "" } else { "/" };
314            return format!("{}{separator}{api_path}", self.base_url);
315        }
316
317        let has_full_endpoint = reqwest::Url::parse(&self.base_url)
318            .map(|url| {
319                url.path()
320                    .trim_end_matches('/')
321                    .ends_with("/chat/completions")
322            })
323            .unwrap_or_else(|_| {
324                self.base_url
325                    .trim_end_matches('/')
326                    .ends_with("/chat/completions")
327            });
328
329        if has_full_endpoint {
330            self.base_url.clone()
331        } else {
332            format!("{}/chat/completions", self.base_url)
333        }
334    }
335
336    fn path_ends_with(&self, suffix: &str) -> bool {
337        if let Ok(url) = reqwest::Url::parse(&self.base_url) {
338            return url.path().trim_end_matches('/').ends_with(suffix);
339        }
340
341        self.base_url.trim_end_matches('/').ends_with(suffix)
342    }
343
344    fn has_explicit_api_path(&self) -> bool {
345        let Ok(url) = reqwest::Url::parse(&self.base_url) else {
346            return false;
347        };
348
349        let path = url.path().trim_end_matches('/');
350        !path.is_empty() && path != "/"
351    }
352
353    fn requires_tool_stream(&self) -> bool {
354        let host_requires_tool_stream = reqwest::Url::parse(&self.base_url)
355            .ok()
356            .and_then(|url| url.host_str().map(str::to_ascii_lowercase))
357            .is_some_and(|host| host == "api.z.ai" || host.ends_with(".z.ai"));
358
359        host_requires_tool_stream || matches!(self.name.as_str(), "zai" | "z.ai")
360    }
361
362    fn tool_stream_for_tools(&self, has_tools: bool) -> Option<bool> {
363        if has_tools && self.requires_tool_stream() {
364            Some(true)
365        } else {
366            None
367        }
368    }
369
370    /// Build the full URL for responses API, detecting if base_url already includes the path.
371    fn responses_url(&self) -> String {
372        if self.path_ends_with("/responses") {
373            return self.base_url.clone();
374        }
375
376        let normalized_base = self.base_url.trim_end_matches('/');
377
378        // If chat endpoint is explicitly configured, derive sibling responses endpoint.
379        if let Some(prefix) = normalized_base.strip_suffix("/chat/completions") {
380            return format!("{prefix}/responses");
381        }
382
383        // If an explicit API path already exists (e.g. /v1, /openai, /api/coding/v3),
384        // append responses directly to avoid duplicate /v1 segments.
385        if self.has_explicit_api_path() {
386            format!("{normalized_base}/responses")
387        } else {
388            format!("{normalized_base}/v1/responses")
389        }
390    }
391
392    fn tool_specs_to_openai_format(tools: &[crate::tools::ToolSpec]) -> Vec<serde_json::Value> {
393        tools
394            .iter()
395            .map(|tool| {
396                serde_json::json!({
397                    "type": "function",
398                    "function": {
399                        "name": tool.name,
400                        "description": tool.description,
401                        "parameters": tool.parameters
402                    }
403                })
404            })
405            .collect()
406    }
407
408    fn reasoning_effort_for_model(&self, model: &str) -> Option<String> {
409        let id = model.rsplit('/').next().unwrap_or(model);
410        let supports_reasoning_effort = id.starts_with("gpt-5") || id.contains("codex");
411        supports_reasoning_effort
412            .then(|| self.reasoning_effort.clone())
413            .flatten()
414    }
415}
416
417#[derive(Debug, Serialize)]
418struct ApiChatRequest {
419    model: String,
420    messages: Vec<Message>,
421    temperature: f64,
422    #[serde(skip_serializing_if = "Option::is_none")]
423    stream: Option<bool>,
424    #[serde(skip_serializing_if = "Option::is_none")]
425    stream_options: Option<StreamOptionsPayload>,
426    #[serde(skip_serializing_if = "Option::is_none")]
427    reasoning_effort: Option<String>,
428    #[serde(skip_serializing_if = "Option::is_none")]
429    tool_stream: Option<bool>,
430    #[serde(skip_serializing_if = "Option::is_none")]
431    tools: Option<Vec<serde_json::Value>>,
432    #[serde(skip_serializing_if = "Option::is_none")]
433    tool_choice: Option<String>,
434    #[serde(skip_serializing_if = "Option::is_none")]
435    max_tokens: Option<u32>,
436}
437
438/// OpenAI chat-completions `stream_options` payload. Opting in to
439/// `include_usage` instructs the server to emit a final chunk carrying the
440/// token usage after the text stream completes.
441#[derive(Debug, Serialize)]
442struct StreamOptionsPayload {
443    include_usage: bool,
444}
445
446#[derive(Debug, Serialize)]
447struct Message {
448    role: String,
449    content: MessageContent,
450}
451
452#[derive(Debug, Serialize)]
453#[serde(untagged)]
454enum MessageContent {
455    Text(String),
456    Parts(Vec<MessagePart>),
457}
458
459#[derive(Debug, Serialize)]
460#[serde(tag = "type", rename_all = "snake_case")]
461enum MessagePart {
462    Text { text: String },
463    ImageUrl { image_url: ImageUrlPart },
464}
465
466#[derive(Debug, Serialize)]
467struct ImageUrlPart {
468    url: String,
469}
470
471#[derive(Debug, Deserialize)]
472struct ApiChatResponse {
473    choices: Vec<Choice>,
474    #[serde(default)]
475    usage: Option<UsageInfo>,
476}
477
478#[derive(Debug, Deserialize)]
479struct UsageInfo {
480    #[serde(default)]
481    prompt_tokens: Option<u64>,
482    #[serde(default)]
483    completion_tokens: Option<u64>,
484}
485
486#[derive(Debug, Deserialize)]
487struct Choice {
488    message: ResponseMessage,
489}
490
491/// Remove `<think>...</think>` blocks from model output.
492/// Some reasoning models (e.g. MiniMax) embed their chain-of-thought inline
493/// in the `content` field rather than a separate `reasoning_content` field.
494/// The resulting `<think>` tags must be stripped before returning to the user.
495fn strip_think_tags(s: &str) -> String {
496    let mut result = String::with_capacity(s.len());
497    let mut rest = s;
498    loop {
499        if let Some(start) = rest.find("<think>") {
500            result.push_str(&rest[..start]);
501            if let Some(end) = rest[start..].find("</think>") {
502                rest = &rest[start + end + "</think>".len()..];
503            } else {
504                // Unclosed tag: drop the rest to avoid leaking partial reasoning.
505                break;
506            }
507        } else {
508            result.push_str(rest);
509            break;
510        }
511    }
512    result.trim().to_string()
513}
514
515#[derive(Debug, Deserialize, Serialize)]
516struct ResponseMessage {
517    #[serde(default)]
518    content: Option<String>,
519    /// Reasoning/thinking models (e.g. Qwen3, GLM-4) may return their output
520    /// in `reasoning_content` instead of `content`. Used as automatic fallback.
521    #[serde(default)]
522    reasoning_content: Option<String>,
523    #[serde(default)]
524    tool_calls: Option<Vec<ToolCall>>,
525}
526
527impl ResponseMessage {
528    /// Extract text content, falling back to `reasoning_content` when `content`
529    /// is missing or empty. Reasoning/thinking models (Qwen3, GLM-4, etc.)
530    /// often return their output solely in `reasoning_content`.
531    /// Strips `<think>...</think>` blocks that some models (e.g. MiniMax) embed
532    /// inline in `content` instead of using a separate field.
533    fn effective_content(&self) -> String {
534        if let Some(content) = self.content.as_ref().filter(|c| !c.is_empty()) {
535            let stripped = strip_think_tags(content);
536            if !stripped.is_empty() {
537                return stripped;
538            }
539        }
540
541        self.reasoning_content
542            .as_ref()
543            .map(|c| strip_think_tags(c))
544            .filter(|c| !c.is_empty())
545            .unwrap_or_default()
546    }
547
548    fn effective_content_optional(&self) -> Option<String> {
549        if let Some(content) = self.content.as_ref().filter(|c| !c.is_empty()) {
550            let stripped = strip_think_tags(content);
551            if !stripped.is_empty() {
552                return Some(stripped);
553            }
554        }
555
556        self.reasoning_content
557            .as_ref()
558            .map(|c| strip_think_tags(c))
559            .filter(|c| !c.is_empty())
560    }
561}
562
563#[derive(Debug, Deserialize, Serialize)]
564struct ToolCall {
565    #[serde(skip_serializing_if = "Option::is_none")]
566    id: Option<String>,
567    #[serde(rename = "type")]
568    #[serde(default, skip_serializing_if = "Option::is_none")]
569    kind: Option<String>,
570    #[serde(default, skip_serializing_if = "Option::is_none")]
571    function: Option<Function>,
572
573    // Compatibility: Some providers (e.g., older GLM) may use 'name' directly
574    #[serde(default, skip_serializing_if = "Option::is_none")]
575    name: Option<String>,
576    #[serde(default, skip_serializing_if = "Option::is_none")]
577    arguments: Option<String>,
578
579    // Compatibility: DeepSeek sometimes wraps arguments differently
580    #[serde(
581        rename = "parameters",
582        default,
583        skip_serializing_if = "Option::is_none"
584    )]
585    parameters: Option<serde_json::Value>,
586}
587
588impl ToolCall {
589    /// Extract function name with fallback logic for various provider formats
590    fn function_name(&self) -> Option<String> {
591        // Standard OpenAI format: tool_calls[].function.name
592        if let Some(ref func) = self.function {
593            if let Some(ref name) = func.name {
594                return Some(name.clone());
595            }
596        }
597        // Fallback: direct name field
598        self.name.clone()
599    }
600
601    /// Extract arguments with fallback logic and type conversion
602    fn function_arguments(&self) -> Option<String> {
603        // Standard OpenAI format: tool_calls[].function.arguments (string)
604        if let Some(ref func) = self.function {
605            if let Some(ref args) = func.arguments {
606                return Some(args.clone());
607            }
608        }
609        // Fallback: direct arguments field
610        if let Some(ref args) = self.arguments {
611            return Some(args.clone());
612        }
613        // Compatibility: Some providers return parameters as object instead of string
614        if let Some(ref params) = self.parameters {
615            return serde_json::to_string(params).ok();
616        }
617        None
618    }
619}
620
621#[derive(Debug, Deserialize, Serialize)]
622struct Function {
623    #[serde(default)]
624    name: Option<String>,
625    #[serde(default)]
626    arguments: Option<String>,
627}
628
629#[derive(Debug, Serialize)]
630struct NativeChatRequest {
631    model: String,
632    messages: Vec<NativeMessage>,
633    temperature: f64,
634    #[serde(skip_serializing_if = "Option::is_none")]
635    stream: Option<bool>,
636    #[serde(skip_serializing_if = "Option::is_none")]
637    stream_options: Option<StreamOptionsPayload>,
638    #[serde(skip_serializing_if = "Option::is_none")]
639    reasoning_effort: Option<String>,
640    #[serde(skip_serializing_if = "Option::is_none")]
641    tool_stream: Option<bool>,
642    #[serde(skip_serializing_if = "Option::is_none")]
643    tools: Option<Vec<serde_json::Value>>,
644    #[serde(skip_serializing_if = "Option::is_none")]
645    tool_choice: Option<String>,
646    #[serde(skip_serializing_if = "Option::is_none")]
647    max_tokens: Option<u32>,
648}
649
650#[derive(Debug, Serialize)]
651struct NativeMessage {
652    role: String,
653    #[serde(skip_serializing_if = "Option::is_none")]
654    content: Option<MessageContent>,
655    #[serde(skip_serializing_if = "Option::is_none")]
656    tool_call_id: Option<String>,
657    #[serde(skip_serializing_if = "Option::is_none")]
658    tool_calls: Option<Vec<ToolCall>>,
659    /// Raw reasoning content from thinking models; pass-through for providers
660    /// that require it in assistant tool-call history messages.
661    #[serde(skip_serializing_if = "Option::is_none")]
662    reasoning_content: Option<String>,
663}
664
665#[derive(Debug, Serialize)]
666struct ResponsesRequest {
667    model: String,
668    input: Vec<ResponsesInput>,
669    #[serde(skip_serializing_if = "Option::is_none")]
670    instructions: Option<String>,
671    #[serde(skip_serializing_if = "Option::is_none")]
672    stream: Option<bool>,
673}
674
675#[derive(Debug, Serialize)]
676struct ResponsesInput {
677    role: String,
678    content: ResponsesInputContent,
679    #[serde(rename = "type", skip_serializing_if = "Option::is_none")]
680    kind: Option<String>,
681}
682
683#[derive(Debug, Serialize)]
684#[serde(untagged)]
685enum ResponsesInputContent {
686    Text(String),
687    Parts(Vec<ResponsesInputPart>),
688}
689
690#[derive(Debug, Serialize)]
691struct ResponsesInputPart {
692    #[serde(rename = "type")]
693    kind: String,
694    text: String,
695}
696
697impl ResponsesInput {
698    fn user_text(content: String) -> Self {
699        Self {
700            role: "user".to_string(),
701            content: ResponsesInputContent::Text(content),
702            kind: None,
703        }
704    }
705
706    fn assistant_output_text(content: String) -> Self {
707        Self {
708            role: "assistant".to_string(),
709            content: ResponsesInputContent::Parts(vec![ResponsesInputPart {
710                kind: "output_text".to_string(),
711                text: content,
712            }]),
713            kind: Some("message".to_string()),
714        }
715    }
716}
717
718#[derive(Debug, Deserialize)]
719struct ResponsesResponse {
720    #[serde(default)]
721    output: Vec<ResponsesOutput>,
722    #[serde(default)]
723    output_text: Option<String>,
724}
725
726#[derive(Debug, Deserialize)]
727struct ResponsesOutput {
728    #[serde(default)]
729    content: Vec<ResponsesContent>,
730}
731
732#[derive(Debug, Deserialize)]
733struct ResponsesContent {
734    #[serde(rename = "type")]
735    kind: Option<String>,
736    text: Option<String>,
737}
738
739// ---------------------------------------------------------------
740// Streaming support (SSE parser)
741// ---------------------------------------------------------------
742
743/// Server-Sent Event stream chunk for OpenAI-compatible streaming.
744#[derive(Debug, Deserialize)]
745struct StreamChunkResponse {
746    #[serde(default)]
747    choices: Vec<StreamChoice>,
748    /// Final usage chunk from servers that honor `stream_options.include_usage`.
749    /// Earlier chunks have `choices` populated and `usage` null; the trailing
750    /// usage chunk flips this — `choices` is empty and `usage` carries totals.
751    #[serde(default)]
752    usage: Option<StreamUsageInfo>,
753}
754
755#[derive(Debug, Deserialize)]
756struct StreamUsageInfo {
757    #[serde(default)]
758    prompt_tokens: Option<u64>,
759    #[serde(default)]
760    completion_tokens: Option<u64>,
761    #[serde(default)]
762    prompt_tokens_details: Option<StreamPromptTokensDetails>,
763}
764
765#[derive(Debug, Deserialize)]
766struct StreamPromptTokensDetails {
767    #[serde(default)]
768    cached_tokens: Option<u64>,
769}
770
771#[derive(Debug, Deserialize)]
772struct StreamChoice {
773    #[serde(default)]
774    delta: StreamDelta,
775    #[serde(default)]
776    finish_reason: Option<String>,
777}
778
779#[derive(Debug, Deserialize, Default)]
780struct StreamDelta {
781    #[serde(default)]
782    content: Option<String>,
783    /// Reasoning/thinking models may stream output via `reasoning_content`.
784    #[serde(default)]
785    reasoning_content: Option<String>,
786    /// Native tool-calling deltas in OpenAI chat-completions streaming format.
787    #[serde(default)]
788    tool_calls: Option<Vec<StreamToolCallDelta>>,
789}
790
791#[derive(Debug, Deserialize)]
792struct StreamToolCallDelta {
793    #[serde(default)]
794    index: Option<usize>,
795    #[serde(default)]
796    id: Option<String>,
797    #[serde(default)]
798    function: Option<StreamFunctionDelta>,
799    // Compatibility: some providers stream name/arguments at top-level.
800    #[serde(default)]
801    name: Option<String>,
802    #[serde(default)]
803    arguments: Option<String>,
804}
805
806#[derive(Debug, Deserialize)]
807struct StreamFunctionDelta {
808    #[serde(default)]
809    name: Option<String>,
810    #[serde(default)]
811    arguments: Option<String>,
812}
813
814#[derive(Debug, Default)]
815struct StreamToolCallAccumulator {
816    id: Option<String>,
817    name: Option<String>,
818    arguments: String,
819}
820
821impl StreamToolCallAccumulator {
822    fn apply_delta(&mut self, delta: &StreamToolCallDelta) {
823        if let Some(id) = delta.id.as_ref().filter(|value| !value.is_empty()) {
824            self.id = Some(id.clone());
825        }
826
827        let delta_name = delta
828            .function
829            .as_ref()
830            .and_then(|function| function.name.as_ref())
831            .or(delta.name.as_ref())
832            .filter(|value| !value.is_empty());
833        if let Some(name) = delta_name {
834            self.name = Some(name.clone());
835        }
836
837        if let Some(arguments_delta) = delta
838            .function
839            .as_ref()
840            .and_then(|function| function.arguments.as_ref())
841            .or(delta.arguments.as_ref())
842            .filter(|value| !value.is_empty())
843        {
844            self.arguments.push_str(arguments_delta);
845        }
846    }
847
848    fn into_provider_tool_call(self) -> Option<ProviderToolCall> {
849        let name = self.name?;
850        let arguments = if self.arguments.trim().is_empty() {
851            "{}".to_string()
852        } else {
853            self.arguments
854        };
855        let normalized_arguments = if serde_json::from_str::<serde_json::Value>(&arguments).is_ok()
856        {
857            arguments
858        } else {
859            tracing::warn!(
860                function = %name,
861                arguments = %arguments,
862                "Invalid JSON in streamed native tool-call arguments, using empty object"
863            );
864            "{}".to_string()
865        };
866
867        Some(ProviderToolCall {
868            id: self.id.unwrap_or_else(|| uuid::Uuid::new_v4().to_string()),
869            name,
870            arguments: normalized_arguments,
871        })
872    }
873}
874
875fn parse_sse_chunk(line: &str) -> StreamResult<Option<StreamChunkResponse>> {
876    let line = line.trim();
877
878    if line.is_empty() || line.starts_with(':') {
879        return Ok(None);
880    }
881
882    let Some(data) = line.strip_prefix("data:") else {
883        return Ok(None);
884    };
885    let data = data.trim();
886
887    if data == "[DONE]" {
888        return Ok(None);
889    }
890
891    serde_json::from_str(data)
892        .map(Some)
893        .map_err(StreamError::Json)
894}
895
896/// Parse custom proxy tool events from SSE lines.
897/// These are emitted by proxies like claude-max-api-proxy that execute tools
898/// internally and forward observability events via custom SSE fields.
899fn parse_proxy_tool_event(line: &str) -> Option<StreamEvent> {
900    let data = line.trim().strip_prefix("data:")?.trim();
901    let obj: serde_json::Value = serde_json::from_str(data).ok()?;
902
903    if let Some(ts) = obj.get("x_tool_start") {
904        let Some(name) = ts.get("name").and_then(|v| v.as_str()) else {
905            tracing::debug!("proxy x_tool_start event missing required 'name' field");
906            return None;
907        };
908        let name = name.to_string();
909        let args = ts
910            .get("arguments")
911            .and_then(|v| v.as_str())
912            .unwrap_or("{}")
913            .to_string();
914        return Some(StreamEvent::PreExecutedToolCall { name, args });
915    }
916
917    if let Some(tr) = obj.get("x_tool_result") {
918        let name = tr
919            .get("name")
920            .and_then(|v| v.as_str())
921            .unwrap_or("unknown")
922            .to_string();
923        let output = tr
924            .get("output")
925            .and_then(|v| v.as_str())
926            .unwrap_or("")
927            .to_string();
928        return Some(StreamEvent::PreExecutedToolResult { name, output });
929    }
930
931    None
932}
933
934fn extract_sse_text_delta(choice: &StreamChoice) -> Option<String> {
935    if let Some(content) = &choice.delta.content {
936        if !content.is_empty() {
937            return Some(content.clone());
938        }
939    }
940
941    choice
942        .delta
943        .reasoning_content
944        .as_ref()
945        .filter(|value| !value.is_empty())
946        .cloned()
947}
948
949/// Parse SSE (Server-Sent Events) stream from OpenAI-compatible providers.
950/// Handles the `data: {...}` format and `[DONE]` sentinel.
951///
952/// Returns a `StreamChunk` that distinguishes content from reasoning:
953/// - Content deltas → `StreamChunk::delta`
954/// - Reasoning deltas → `StreamChunk::reasoning`
955fn parse_sse_line(line: &str) -> StreamResult<Option<StreamChunk>> {
956    let chunk = match parse_sse_chunk(line)? {
957        Some(c) => c,
958        None => return Ok(None),
959    };
960
961    if let Some(choice) = chunk.choices.first() {
962        if let Some(content) = &choice.delta.content {
963            if !content.is_empty() {
964                return Ok(Some(StreamChunk::delta(content.clone())));
965            }
966        }
967        if let Some(reasoning) = &choice.delta.reasoning_content {
968            if !reasoning.is_empty() {
969                return Ok(Some(StreamChunk::reasoning(reasoning.clone())));
970            }
971        }
972    }
973
974    Ok(None)
975}
976
977/// Convert SSE byte stream to text chunks.
978fn sse_bytes_to_chunks(
979    response: reqwest::Response,
980    count_tokens: bool,
981) -> stream::BoxStream<'static, StreamResult<StreamChunk>> {
982    let (tx, rx) = tokio::sync::mpsc::channel::<StreamResult<StreamChunk>>(100);
983
984    tokio::spawn(async move {
985        let mut buffer = String::new();
986
987        match response.error_for_status_ref() {
988            Ok(_) => {}
989            Err(e) => {
990                let _ = tx.send(Err(StreamError::Http(e))).await;
991                return;
992            }
993        }
994
995        let mut bytes_stream = response.bytes_stream();
996        // Accumulate partial UTF-8 sequences that may be split across
997        // HTTP/1.1 chunked transfer boundaries (e.g. 3-byte CJK chars).
998        let mut utf8_buf: Vec<u8> = Vec::new();
999
1000        while let Some(item) = bytes_stream.next().await {
1001            match item {
1002                Ok(bytes) => {
1003                    utf8_buf.extend_from_slice(&bytes);
1004                    let text = match std::str::from_utf8(&utf8_buf) {
1005                        Ok(s) => {
1006                            let owned = s.to_string();
1007                            utf8_buf.clear();
1008                            owned
1009                        }
1010                        Err(e) => {
1011                            let valid_up_to = e.valid_up_to();
1012                            if valid_up_to == 0 && utf8_buf.len() < 4 {
1013                                // Could still be an incomplete multi-byte char; wait for more data
1014                                continue;
1015                            }
1016                            let valid =
1017                                String::from_utf8_lossy(&utf8_buf[..valid_up_to]).into_owned();
1018                            utf8_buf.drain(..valid_up_to);
1019                            valid
1020                        }
1021                    };
1022                    if text.is_empty() {
1023                        continue;
1024                    }
1025
1026                    buffer.push_str(&text);
1027
1028                    while let Some(pos) = buffer.find('\n') {
1029                        let line = buffer[..pos].to_string();
1030                        buffer.drain(..=pos);
1031
1032                        match parse_sse_line(&line) {
1033                            Ok(Some(chunk)) => {
1034                                let chunk = if count_tokens {
1035                                    chunk.with_token_estimate()
1036                                } else {
1037                                    chunk
1038                                };
1039                                if tx.send(Ok(chunk)).await.is_err() {
1040                                    return; // Receiver dropped
1041                                }
1042                            }
1043                            Ok(None) => {}
1044                            Err(e) => {
1045                                let _ = tx.send(Err(e)).await;
1046                                return;
1047                            }
1048                        }
1049                    }
1050                }
1051                Err(e) => {
1052                    let _ = tx.send(Err(StreamError::Http(e))).await;
1053                    return;
1054                }
1055            }
1056        }
1057
1058        let _ = tx.send(Ok(StreamChunk::final_chunk())).await;
1059    });
1060
1061    stream::unfold(rx, |mut rx| async {
1062        rx.recv().await.map(|chunk| (chunk, rx))
1063    })
1064    .boxed()
1065}
1066
1067/// Convert SSE byte stream to structured streaming events.
1068fn sse_bytes_to_events(
1069    response: reqwest::Response,
1070    count_tokens: bool,
1071) -> stream::BoxStream<'static, StreamResult<StreamEvent>> {
1072    let (tx, rx) = tokio::sync::mpsc::channel::<StreamResult<StreamEvent>>(100);
1073
1074    tokio::spawn(async move {
1075        let mut buffer = String::new();
1076        let mut tool_calls: Vec<StreamToolCallAccumulator> = Vec::new();
1077        let mut emitted_tool_calls = false;
1078
1079        match response.error_for_status_ref() {
1080            Ok(_) => {}
1081            Err(e) => {
1082                let _ = tx.send(Err(StreamError::Http(e))).await;
1083                return;
1084            }
1085        }
1086
1087        let mut bytes_stream = response.bytes_stream();
1088        // Accumulate partial UTF-8 sequences split across chunk boundaries.
1089        let mut utf8_buf: Vec<u8> = Vec::new();
1090        while let Some(item) = bytes_stream.next().await {
1091            match item {
1092                Ok(bytes) => {
1093                    utf8_buf.extend_from_slice(&bytes);
1094                    let text = match std::str::from_utf8(&utf8_buf) {
1095                        Ok(s) => {
1096                            let owned = s.to_string();
1097                            utf8_buf.clear();
1098                            owned
1099                        }
1100                        Err(e) => {
1101                            let valid_up_to = e.valid_up_to();
1102                            if valid_up_to == 0 && utf8_buf.len() < 4 {
1103                                continue;
1104                            }
1105                            let valid =
1106                                String::from_utf8_lossy(&utf8_buf[..valid_up_to]).into_owned();
1107                            utf8_buf.drain(..valid_up_to);
1108                            valid
1109                        }
1110                    };
1111                    if text.is_empty() {
1112                        continue;
1113                    }
1114
1115                    buffer.push_str(&text);
1116
1117                    while let Some(pos) = buffer.find('\n') {
1118                        let line = buffer[..pos].to_string();
1119                        buffer.drain(..=pos);
1120
1121                        // Custom proxy events for pre-executed tool calls
1122                        // (e.g. claude-max-api-proxy streaming x_tool_start/x_tool_result)
1123                        if let Some(event) = parse_proxy_tool_event(&line) {
1124                            if tx.send(Ok(event)).await.is_err() {
1125                                return;
1126                            }
1127                            continue;
1128                        }
1129
1130                        let chunk = match parse_sse_chunk(&line) {
1131                            Ok(Some(chunk)) => chunk,
1132                            Ok(None) => continue,
1133                            Err(e) => {
1134                                let _ = tx.send(Err(e)).await;
1135                                return;
1136                            }
1137                        };
1138
1139                        if let Some(usage) = chunk.usage.as_ref() {
1140                            let token_usage = crate::providers::traits::TokenUsage {
1141                                input_tokens: usage.prompt_tokens,
1142                                output_tokens: usage.completion_tokens,
1143                                cached_input_tokens: usage
1144                                    .prompt_tokens_details
1145                                    .as_ref()
1146                                    .and_then(|d| d.cached_tokens),
1147                            };
1148                            if tx.send(Ok(StreamEvent::Usage(token_usage))).await.is_err() {
1149                                return;
1150                            }
1151                        }
1152
1153                        let mut should_emit_tool_calls = false;
1154                        for choice in &chunk.choices {
1155                            if let Some(text_delta) = extract_sse_text_delta(choice) {
1156                                let mut text_chunk = StreamChunk::delta(text_delta);
1157                                if count_tokens {
1158                                    text_chunk = text_chunk.with_token_estimate();
1159                                }
1160                                if tx
1161                                    .send(Ok(StreamEvent::TextDelta(text_chunk)))
1162                                    .await
1163                                    .is_err()
1164                                {
1165                                    return;
1166                                }
1167                            }
1168
1169                            if let Some(deltas) = choice.delta.tool_calls.as_ref() {
1170                                for delta in deltas {
1171                                    let index = delta.index.unwrap_or(tool_calls.len());
1172                                    if index >= tool_calls.len() {
1173                                        tool_calls.resize_with(index + 1, Default::default);
1174                                    }
1175                                    if let Some(acc) = tool_calls.get_mut(index) {
1176                                        acc.apply_delta(delta);
1177                                    }
1178                                }
1179                            }
1180
1181                            if choice.finish_reason.as_deref() == Some("tool_calls") {
1182                                should_emit_tool_calls = true;
1183                            }
1184                        }
1185
1186                        if should_emit_tool_calls && !emitted_tool_calls {
1187                            emitted_tool_calls = true;
1188                            for tool_call in tool_calls
1189                                .drain(..)
1190                                .filter_map(StreamToolCallAccumulator::into_provider_tool_call)
1191                            {
1192                                if tx.send(Ok(StreamEvent::ToolCall(tool_call))).await.is_err() {
1193                                    return;
1194                                }
1195                            }
1196                        }
1197                    }
1198                }
1199                Err(e) => {
1200                    let _ = tx.send(Err(StreamError::Http(e))).await;
1201                    return;
1202                }
1203            }
1204        }
1205
1206        if !emitted_tool_calls {
1207            for tool_call in tool_calls
1208                .drain(..)
1209                .filter_map(StreamToolCallAccumulator::into_provider_tool_call)
1210            {
1211                if tx.send(Ok(StreamEvent::ToolCall(tool_call))).await.is_err() {
1212                    return;
1213                }
1214            }
1215        }
1216
1217        let _ = tx.send(Ok(StreamEvent::Final)).await;
1218    });
1219
1220    stream::unfold(rx, |mut rx| async move {
1221        rx.recv().await.map(|event| (event, rx))
1222    })
1223    .boxed()
1224}
1225
1226fn first_nonempty(text: Option<&str>) -> Option<String> {
1227    text.and_then(|value| {
1228        let trimmed = value.trim();
1229        if trimmed.is_empty() {
1230            None
1231        } else {
1232            Some(trimmed.to_string())
1233        }
1234    })
1235}
1236
1237fn build_responses_prompt(messages: &[ChatMessage]) -> (Option<String>, Vec<ResponsesInput>) {
1238    let mut instructions_parts = Vec::new();
1239    let mut input = Vec::new();
1240
1241    for message in messages {
1242        if message.content.trim().is_empty() {
1243            continue;
1244        }
1245
1246        if message.role == "system" {
1247            instructions_parts.push(message.content.clone());
1248            continue;
1249        }
1250
1251        let input_item = match message.role.as_str() {
1252            // llama.cpp Responses parser expects assistant history items in
1253            // "output_message" shape (`type=message`, `output_text` parts).
1254            "assistant" | "tool" => ResponsesInput::assistant_output_text(message.content.clone()),
1255            _ => ResponsesInput::user_text(message.content.clone()),
1256        };
1257        input.push(input_item);
1258    }
1259
1260    let instructions = if instructions_parts.is_empty() {
1261        None
1262    } else {
1263        Some(instructions_parts.join("\n\n"))
1264    };
1265
1266    (instructions, input)
1267}
1268
1269fn extract_responses_text(response: ResponsesResponse) -> Option<String> {
1270    if let Some(text) = first_nonempty(response.output_text.as_deref()) {
1271        return Some(text);
1272    }
1273
1274    for item in &response.output {
1275        for content in &item.content {
1276            if content.kind.as_deref() == Some("output_text") {
1277                if let Some(text) = first_nonempty(content.text.as_deref()) {
1278                    return Some(text);
1279                }
1280            }
1281        }
1282    }
1283
1284    for item in &response.output {
1285        for content in &item.content {
1286            if let Some(text) = first_nonempty(content.text.as_deref()) {
1287                return Some(text);
1288            }
1289        }
1290    }
1291
1292    None
1293}
1294
1295fn compact_sanitized_body_snippet(body: &str) -> String {
1296    super::sanitize_api_error(body)
1297        .split_whitespace()
1298        .collect::<Vec<_>>()
1299        .join(" ")
1300}
1301
1302fn parse_chat_response_body(provider_name: &str, body: &str) -> anyhow::Result<ApiChatResponse> {
1303    serde_json::from_str::<ApiChatResponse>(body).map_err(|error| {
1304        let snippet = compact_sanitized_body_snippet(body);
1305        anyhow::anyhow!(
1306            "{provider_name} API returned an unexpected chat-completions payload: {error}; body={snippet}"
1307        )
1308    })
1309}
1310
1311fn parse_responses_response_body(
1312    provider_name: &str,
1313    body: &str,
1314) -> anyhow::Result<ResponsesResponse> {
1315    serde_json::from_str::<ResponsesResponse>(body).map_err(|error| {
1316        let snippet = compact_sanitized_body_snippet(body);
1317        anyhow::anyhow!(
1318            "{provider_name} Responses API returned an unexpected payload: {error}; body={snippet}"
1319        )
1320    })
1321}
1322
1323impl OpenAiCompatibleProvider {
1324    fn apply_auth_header(
1325        &self,
1326        req: reqwest::RequestBuilder,
1327        credential: &str,
1328    ) -> reqwest::RequestBuilder {
1329        match &self.auth_header {
1330            AuthStyle::Bearer => req.header("Authorization", format!("Bearer {credential}")),
1331            AuthStyle::XApiKey => req.header("x-api-key", credential),
1332            AuthStyle::Custom(header) => req.header(header, credential),
1333        }
1334    }
1335
1336    async fn chat_via_responses(
1337        &self,
1338        credential: &str,
1339        messages: &[ChatMessage],
1340        model: &str,
1341    ) -> anyhow::Result<String> {
1342        let (instructions, input) = build_responses_prompt(messages);
1343        if input.is_empty() {
1344            anyhow::bail!(
1345                "{} Responses API fallback requires at least one non-system message",
1346                self.name
1347            );
1348        }
1349
1350        let request = ResponsesRequest {
1351            model: model.to_string(),
1352            input,
1353            instructions,
1354            stream: Some(false),
1355        };
1356
1357        let url = self.responses_url();
1358
1359        let response = self
1360            .apply_auth_header(self.http_client().post(&url).json(&request), credential)
1361            .send()
1362            .await?;
1363
1364        if !response.status().is_success() {
1365            let error = response.text().await?;
1366            anyhow::bail!("{} Responses API error: {error}", self.name);
1367        }
1368
1369        let body = response.text().await?;
1370        let responses = parse_responses_response_body(&self.name, &body)?;
1371
1372        extract_responses_text(responses)
1373            .ok_or_else(|| anyhow::anyhow!("No response from {} Responses API", self.name))
1374    }
1375
1376    fn convert_tool_specs(
1377        tools: Option<&[crate::tools::ToolSpec]>,
1378    ) -> Option<Vec<serde_json::Value>> {
1379        tools.map(|items| {
1380            items
1381                .iter()
1382                .map(|tool| {
1383                    serde_json::json!({
1384                        "type": "function",
1385                        "function": {
1386                            "name": tool.name,
1387                            "description": tool.description,
1388                            "parameters": tool.parameters,
1389                        }
1390                    })
1391                })
1392                .collect()
1393        })
1394    }
1395
1396    fn to_message_content(
1397        role: &str,
1398        content: &str,
1399        allow_user_image_parts: bool,
1400    ) -> MessageContent {
1401        if role != "user" || !allow_user_image_parts {
1402            return MessageContent::Text(content.to_string());
1403        }
1404
1405        let (cleaned_text, image_refs) = multimodal::parse_image_markers(content);
1406        if image_refs.is_empty() {
1407            return MessageContent::Text(content.to_string());
1408        }
1409
1410        let mut parts = Vec::with_capacity(image_refs.len() + 1);
1411        let trimmed_text = cleaned_text.trim();
1412        if !trimmed_text.is_empty() {
1413            parts.push(MessagePart::Text {
1414                text: trimmed_text.to_string(),
1415            });
1416        }
1417
1418        for image_ref in image_refs {
1419            parts.push(MessagePart::ImageUrl {
1420                image_url: ImageUrlPart { url: image_ref },
1421            });
1422        }
1423
1424        MessageContent::Parts(parts)
1425    }
1426
1427    fn convert_messages_for_native(
1428        messages: &[ChatMessage],
1429        allow_user_image_parts: bool,
1430    ) -> Vec<NativeMessage> {
1431        messages
1432            .iter()
1433            .map(|message| {
1434                if message.role == "assistant" {
1435                    if let Ok(value) = serde_json::from_str::<serde_json::Value>(&message.content)
1436                    {
1437                        if let Some(tool_calls_value) = value.get("tool_calls") {
1438                            if let Ok(parsed_calls) =
1439                                serde_json::from_value::<Vec<ProviderToolCall>>(
1440                                    tool_calls_value.clone(),
1441                                )
1442                            {
1443                                let tool_calls = parsed_calls
1444                                    .into_iter()
1445                                    .map(|tc| ToolCall {
1446                                        id: Some(tc.id),
1447                                        kind: Some("function".to_string()),
1448                                        function: Some(Function {
1449                                            name: Some(tc.name),
1450                                            arguments: Some(tc.arguments),
1451                                        }),
1452                                        name: None,
1453                                        arguments: None,
1454                                        parameters: None,
1455                                    })
1456                                    .collect::<Vec<_>>();
1457
1458                                let content = value
1459                                    .get("content")
1460                                    .and_then(serde_json::Value::as_str)
1461                                    .map(|value| MessageContent::Text(value.to_string()));
1462
1463                                let reasoning_content = value
1464                                    .get("reasoning_content")
1465                                    .and_then(serde_json::Value::as_str)
1466                                    .map(ToString::to_string);
1467
1468                                return NativeMessage {
1469                                    role: "assistant".to_string(),
1470                                    content,
1471                                    tool_call_id: None,
1472                                    tool_calls: Some(tool_calls),
1473                                    reasoning_content,
1474                                };
1475                            }
1476                        }
1477                    }
1478                }
1479
1480                if message.role == "tool" {
1481                    if let Ok(value) = serde_json::from_str::<serde_json::Value>(&message.content) {
1482                        let tool_call_id = value
1483                            .get("tool_call_id")
1484                            .and_then(serde_json::Value::as_str)
1485                            .map(ToString::to_string);
1486                        let content = value
1487                            .get("content")
1488                            .and_then(serde_json::Value::as_str)
1489                            .map(|value| MessageContent::Text(value.to_string()))
1490                            .or_else(|| Some(MessageContent::Text(message.content.clone())));
1491
1492                        return NativeMessage {
1493                            role: "tool".to_string(),
1494                            content,
1495                            tool_call_id,
1496                            tool_calls: None,
1497                            reasoning_content: None,
1498                        };
1499                    }
1500                }
1501
1502                NativeMessage {
1503                    role: message.role.clone(),
1504                    content: Some(Self::to_message_content(
1505                        &message.role,
1506                        &message.content,
1507                        allow_user_image_parts,
1508                    )),
1509                    tool_call_id: None,
1510                    tool_calls: None,
1511                    reasoning_content: None,
1512                }
1513            })
1514            .collect()
1515    }
1516
1517    fn with_prompt_guided_tool_instructions(
1518        messages: &[ChatMessage],
1519        tools: Option<&[crate::tools::ToolSpec]>,
1520    ) -> Vec<ChatMessage> {
1521        let Some(tools) = tools else {
1522            return messages.to_vec();
1523        };
1524
1525        if tools.is_empty() {
1526            return messages.to_vec();
1527        }
1528
1529        let instructions = crate::providers::traits::build_tool_instructions_text(tools);
1530        let mut modified_messages = messages.to_vec();
1531
1532        if let Some(system_message) = modified_messages.iter_mut().find(|m| m.role == "system") {
1533            if !system_message.content.is_empty() {
1534                system_message.content.push_str("\n\n");
1535            }
1536            system_message.content.push_str(&instructions);
1537        } else {
1538            modified_messages.insert(0, ChatMessage::system(instructions));
1539        }
1540
1541        modified_messages
1542    }
1543
1544    fn parse_native_response(message: ResponseMessage) -> ProviderChatResponse {
1545        let text = message.effective_content_optional();
1546        let reasoning_content = message.reasoning_content.clone();
1547        let tool_calls = message
1548            .tool_calls
1549            .unwrap_or_default()
1550            .into_iter()
1551            .filter_map(|tc| {
1552                let name = tc.function_name()?;
1553                let arguments = tc.function_arguments().unwrap_or_else(|| "{}".to_string());
1554                let normalized_arguments =
1555                    if serde_json::from_str::<serde_json::Value>(&arguments).is_ok() {
1556                        arguments
1557                    } else {
1558                        tracing::warn!(
1559                            function = %name,
1560                            arguments = %arguments,
1561                            "Invalid JSON in native tool-call arguments, using empty object"
1562                        );
1563                        "{}".to_string()
1564                    };
1565                Some(ProviderToolCall {
1566                    id: tc.id.unwrap_or_else(|| uuid::Uuid::new_v4().to_string()),
1567                    name,
1568                    arguments: normalized_arguments,
1569                })
1570            })
1571            .collect::<Vec<_>>();
1572
1573        ProviderChatResponse {
1574            text,
1575            tool_calls,
1576            usage: None,
1577            reasoning_content,
1578        }
1579    }
1580
1581    fn is_native_tool_schema_unsupported(status: reqwest::StatusCode, error: &str) -> bool {
1582        if !matches!(
1583            status,
1584            reqwest::StatusCode::BAD_REQUEST | reqwest::StatusCode::UNPROCESSABLE_ENTITY
1585        ) {
1586            return false;
1587        }
1588
1589        let lower = error.to_lowercase();
1590        [
1591            "unknown parameter: tools",
1592            "unsupported parameter: tools",
1593            "unrecognized field `tools`",
1594            "does not support tools",
1595            "function calling is not supported",
1596            "tool_choice",
1597            "tool call validation failed",
1598            "was not in request",
1599        ]
1600        .iter()
1601        .any(|hint| lower.contains(hint))
1602    }
1603}
1604
1605#[async_trait]
1606impl Provider for OpenAiCompatibleProvider {
1607    fn capabilities(&self) -> crate::providers::traits::ProviderCapabilities {
1608        crate::providers::traits::ProviderCapabilities {
1609            native_tool_calling: self.native_tool_calling,
1610            vision: self.supports_vision,
1611            prompt_caching: false,
1612        }
1613    }
1614
1615    async fn chat_with_system(
1616        &self,
1617        system_prompt: Option<&str>,
1618        message: &str,
1619        model: &str,
1620        temperature: f64,
1621    ) -> anyhow::Result<String> {
1622        let credential = self.credential.as_ref().ok_or_else(|| {
1623            anyhow::anyhow!(
1624                "{} API key not set. Run `construct onboard` or set the appropriate env var.",
1625                self.name
1626            )
1627        })?;
1628
1629        let mut messages = Vec::new();
1630
1631        if self.merge_system_into_user {
1632            let content = match system_prompt {
1633                Some(sys) => format!("{sys}\n\n{message}"),
1634                None => message.to_string(),
1635            };
1636            messages.push(Message {
1637                role: "user".to_string(),
1638                content: Self::to_message_content("user", &content, !self.merge_system_into_user),
1639            });
1640        } else {
1641            if let Some(sys) = system_prompt {
1642                messages.push(Message {
1643                    role: "system".to_string(),
1644                    content: MessageContent::Text(sys.to_string()),
1645                });
1646            }
1647            messages.push(Message {
1648                role: "user".to_string(),
1649                content: Self::to_message_content("user", message, true),
1650            });
1651        }
1652
1653        let request = ApiChatRequest {
1654            model: model.to_string(),
1655            messages,
1656            temperature,
1657            stream: Some(false),
1658            stream_options: None,
1659            reasoning_effort: self.reasoning_effort_for_model(model),
1660            tool_stream: None,
1661            tools: None,
1662            tool_choice: None,
1663            max_tokens: self.max_tokens,
1664        };
1665
1666        let url = self.chat_completions_url();
1667
1668        let mut fallback_messages = Vec::new();
1669        if let Some(system_prompt) = system_prompt {
1670            fallback_messages.push(ChatMessage::system(system_prompt));
1671        }
1672        fallback_messages.push(ChatMessage::user(message));
1673        let fallback_messages = if self.merge_system_into_user {
1674            Self::flatten_system_messages(&fallback_messages)
1675        } else {
1676            fallback_messages
1677        };
1678
1679        let response = match self
1680            .apply_auth_header(self.http_client().post(&url).json(&request), credential)
1681            .send()
1682            .await
1683        {
1684            Ok(response) => response,
1685            Err(chat_error) => {
1686                if self.supports_responses_fallback {
1687                    let sanitized = super::sanitize_api_error(&chat_error.to_string());
1688                    return self
1689                        .chat_via_responses(credential, &fallback_messages, model)
1690                        .await
1691                        .map_err(|responses_err| {
1692                            anyhow::anyhow!(
1693                                "{} chat completions transport error: {sanitized} (responses fallback failed: {responses_err})",
1694                                self.name
1695                            )
1696                        });
1697                }
1698
1699                return Err(chat_error.into());
1700            }
1701        };
1702
1703        if !response.status().is_success() {
1704            let status = response.status();
1705            let error = response.text().await?;
1706            let sanitized = super::sanitize_api_error(&error);
1707
1708            if status == reqwest::StatusCode::NOT_FOUND && self.supports_responses_fallback {
1709                return self
1710                    .chat_via_responses(credential, &fallback_messages, model)
1711                    .await
1712                    .map_err(|responses_err| {
1713                        anyhow::anyhow!(
1714                            "{} API error ({status}): {sanitized} (chat completions unavailable; responses fallback failed: {responses_err})",
1715                            self.name
1716                        )
1717                    });
1718            }
1719
1720            anyhow::bail!("{} API error ({status}): {sanitized}", self.name);
1721        }
1722
1723        let body = response.text().await?;
1724        let chat_response = parse_chat_response_body(&self.name, &body)?;
1725
1726        chat_response
1727            .choices
1728            .into_iter()
1729            .next()
1730            .map(|c| {
1731                // If tool_calls are present, serialize the full message as JSON
1732                // so parse_tool_calls can handle the OpenAI-style format
1733                if c.message.tool_calls.is_some()
1734                    && c.message
1735                        .tool_calls
1736                        .as_ref()
1737                        .map_or(false, |t| !t.is_empty())
1738                {
1739                    serde_json::to_string(&c.message)
1740                        .unwrap_or_else(|_| c.message.effective_content())
1741                } else {
1742                    // No tool calls, return content (with reasoning_content fallback)
1743                    c.message.effective_content()
1744                }
1745            })
1746            .ok_or_else(|| anyhow::anyhow!("No response from {}", self.name))
1747    }
1748
1749    async fn chat_with_history(
1750        &self,
1751        messages: &[ChatMessage],
1752        model: &str,
1753        temperature: f64,
1754    ) -> anyhow::Result<String> {
1755        let credential = self.credential.as_ref().ok_or_else(|| {
1756            anyhow::anyhow!(
1757                "{} API key not set. Run `construct onboard` or set the appropriate env var.",
1758                self.name
1759            )
1760        })?;
1761
1762        let effective_messages = if self.merge_system_into_user {
1763            Self::flatten_system_messages(messages)
1764        } else {
1765            messages.to_vec()
1766        };
1767        let api_messages: Vec<Message> = effective_messages
1768            .iter()
1769            .map(|m| Message {
1770                role: m.role.clone(),
1771                content: Self::to_message_content(
1772                    &m.role,
1773                    &m.content,
1774                    !self.merge_system_into_user,
1775                ),
1776            })
1777            .collect();
1778
1779        let request = ApiChatRequest {
1780            model: model.to_string(),
1781            messages: api_messages,
1782            temperature,
1783            stream: Some(false),
1784            stream_options: None,
1785            reasoning_effort: self.reasoning_effort_for_model(model),
1786            tool_stream: None,
1787            tools: None,
1788            tool_choice: None,
1789            max_tokens: self.max_tokens,
1790        };
1791
1792        let url = self.chat_completions_url();
1793        let response = match self
1794            .apply_auth_header(self.http_client().post(&url).json(&request), credential)
1795            .send()
1796            .await
1797        {
1798            Ok(response) => response,
1799            Err(chat_error) => {
1800                if self.supports_responses_fallback {
1801                    let sanitized = super::sanitize_api_error(&chat_error.to_string());
1802                    return self
1803                        .chat_via_responses(credential, &effective_messages, model)
1804                        .await
1805                        .map_err(|responses_err| {
1806                            anyhow::anyhow!(
1807                                "{} chat completions transport error: {sanitized} (responses fallback failed: {responses_err})",
1808                                self.name
1809                            )
1810                        });
1811                }
1812
1813                return Err(chat_error.into());
1814            }
1815        };
1816
1817        if !response.status().is_success() {
1818            let status = response.status();
1819
1820            // Mirror chat_with_system: 404 may mean this provider uses the Responses API
1821            if status == reqwest::StatusCode::NOT_FOUND && self.supports_responses_fallback {
1822                return self
1823                    .chat_via_responses(credential, &effective_messages, model)
1824                    .await
1825                    .map_err(|responses_err| {
1826                        anyhow::anyhow!(
1827                            "{} API error (chat completions unavailable; responses fallback failed: {responses_err})",
1828                            self.name
1829                        )
1830                    });
1831            }
1832
1833            return Err(super::api_error(&self.name, response).await);
1834        }
1835
1836        let body = response.text().await?;
1837        let chat_response = parse_chat_response_body(&self.name, &body)?;
1838
1839        chat_response
1840            .choices
1841            .into_iter()
1842            .next()
1843            .map(|c| {
1844                // If tool_calls are present, serialize the full message as JSON
1845                // so parse_tool_calls can handle the OpenAI-style format
1846                if c.message.tool_calls.is_some()
1847                    && c.message
1848                        .tool_calls
1849                        .as_ref()
1850                        .map_or(false, |t| !t.is_empty())
1851                {
1852                    serde_json::to_string(&c.message)
1853                        .unwrap_or_else(|_| c.message.effective_content())
1854                } else {
1855                    // No tool calls, return content (with reasoning_content fallback)
1856                    c.message.effective_content()
1857                }
1858            })
1859            .ok_or_else(|| anyhow::anyhow!("No response from {}", self.name))
1860    }
1861
1862    async fn chat_with_tools(
1863        &self,
1864        messages: &[ChatMessage],
1865        tools: &[serde_json::Value],
1866        model: &str,
1867        temperature: f64,
1868    ) -> anyhow::Result<ProviderChatResponse> {
1869        let credential = self.credential.as_ref().ok_or_else(|| {
1870            anyhow::anyhow!(
1871                "{} API key not set. Run `construct onboard` or set the appropriate env var.",
1872                self.name
1873            )
1874        })?;
1875
1876        let effective_messages = if self.merge_system_into_user {
1877            Self::flatten_system_messages(messages)
1878        } else {
1879            messages.to_vec()
1880        };
1881        let api_messages: Vec<Message> = effective_messages
1882            .iter()
1883            .map(|m| Message {
1884                role: m.role.clone(),
1885                content: Self::to_message_content(
1886                    &m.role,
1887                    &m.content,
1888                    !self.merge_system_into_user,
1889                ),
1890            })
1891            .collect();
1892
1893        let request = ApiChatRequest {
1894            model: model.to_string(),
1895            messages: api_messages,
1896            temperature,
1897            stream: Some(false),
1898            stream_options: None,
1899            reasoning_effort: self.reasoning_effort_for_model(model),
1900            tool_stream: self.tool_stream_for_tools(!tools.is_empty()),
1901            tools: if tools.is_empty() {
1902                None
1903            } else {
1904                Some(tools.to_vec())
1905            },
1906            tool_choice: if tools.is_empty() {
1907                None
1908            } else {
1909                Some("auto".to_string())
1910            },
1911            max_tokens: self.max_tokens,
1912        };
1913
1914        let url = self.chat_completions_url();
1915        let response = match self
1916            .apply_auth_header(self.http_client().post(&url).json(&request), credential)
1917            .send()
1918            .await
1919        {
1920            Ok(response) => response,
1921            Err(error) => {
1922                tracing::warn!(
1923                    "{} native tool call transport failed: {error}; falling back to history path",
1924                    self.name
1925                );
1926                let text = self.chat_with_history(messages, model, temperature).await?;
1927                return Ok(ProviderChatResponse {
1928                    text: Some(text),
1929                    tool_calls: vec![],
1930                    usage: None,
1931                    reasoning_content: None,
1932                });
1933            }
1934        };
1935
1936        if !response.status().is_success() {
1937            return Err(super::api_error(&self.name, response).await);
1938        }
1939
1940        let body = response.text().await?;
1941        let chat_response = parse_chat_response_body(&self.name, &body)?;
1942        let usage = chat_response.usage.map(|u| TokenUsage {
1943            input_tokens: u.prompt_tokens,
1944            output_tokens: u.completion_tokens,
1945            cached_input_tokens: None,
1946        });
1947        let choice = chat_response
1948            .choices
1949            .into_iter()
1950            .next()
1951            .ok_or_else(|| anyhow::anyhow!("No response from {}", self.name))?;
1952
1953        let text = choice.message.effective_content_optional();
1954        let reasoning_content = choice.message.reasoning_content;
1955        let tool_calls = choice
1956            .message
1957            .tool_calls
1958            .unwrap_or_default()
1959            .into_iter()
1960            .filter_map(|tc| {
1961                let function = tc.function?;
1962                let name = function.name?;
1963                let arguments = function.arguments.unwrap_or_else(|| "{}".to_string());
1964                Some(ProviderToolCall {
1965                    id: uuid::Uuid::new_v4().to_string(),
1966                    name,
1967                    arguments,
1968                })
1969            })
1970            .collect::<Vec<_>>();
1971
1972        Ok(ProviderChatResponse {
1973            text,
1974            tool_calls,
1975            usage,
1976            reasoning_content,
1977        })
1978    }
1979
1980    async fn chat(
1981        &self,
1982        request: ProviderChatRequest<'_>,
1983        model: &str,
1984        temperature: f64,
1985    ) -> anyhow::Result<ProviderChatResponse> {
1986        let credential = self.credential.as_ref().ok_or_else(|| {
1987            anyhow::anyhow!(
1988                "{} API key not set. Run `construct onboard` or set the appropriate env var.",
1989                self.name
1990            )
1991        })?;
1992
1993        let tools = Self::convert_tool_specs(request.tools);
1994        let effective_messages = if self.merge_system_into_user {
1995            Self::flatten_system_messages(request.messages)
1996        } else {
1997            request.messages.to_vec()
1998        };
1999        let native_request = NativeChatRequest {
2000            model: model.to_string(),
2001            messages: Self::convert_messages_for_native(
2002                &effective_messages,
2003                !self.merge_system_into_user,
2004            ),
2005            temperature,
2006            stream: Some(false),
2007            stream_options: None,
2008            reasoning_effort: self.reasoning_effort_for_model(model),
2009            tool_stream: self
2010                .tool_stream_for_tools(tools.as_ref().is_some_and(|tools| !tools.is_empty())),
2011            tool_choice: tools.as_ref().map(|_| "auto".to_string()),
2012            tools,
2013            max_tokens: self.max_tokens,
2014        };
2015
2016        let url = self.chat_completions_url();
2017        let response = match self
2018            .apply_auth_header(
2019                self.http_client().post(&url).json(&native_request),
2020                credential,
2021            )
2022            .send()
2023            .await
2024        {
2025            Ok(response) => response,
2026            Err(chat_error) => {
2027                if self.supports_responses_fallback {
2028                    let sanitized = super::sanitize_api_error(&chat_error.to_string());
2029                    return self
2030                        .chat_via_responses(credential, &effective_messages, model)
2031                        .await
2032                        .map(|text| ProviderChatResponse {
2033                            text: Some(text),
2034                            tool_calls: vec![],
2035                            usage: None,
2036                            reasoning_content: None,
2037                        })
2038                        .map_err(|responses_err| {
2039                            anyhow::anyhow!(
2040                                "{} native chat transport error: {sanitized} (responses fallback failed: {responses_err})",
2041                                self.name
2042                            )
2043                        });
2044                }
2045
2046                return Err(chat_error.into());
2047            }
2048        };
2049
2050        if !response.status().is_success() {
2051            let status = response.status();
2052            let error = response.text().await?;
2053            let sanitized = super::sanitize_api_error(&error);
2054
2055            if Self::is_native_tool_schema_unsupported(status, &sanitized) {
2056                let fallback_messages =
2057                    Self::with_prompt_guided_tool_instructions(request.messages, request.tools);
2058                let text = self
2059                    .chat_with_history(&fallback_messages, model, temperature)
2060                    .await?;
2061                return Ok(ProviderChatResponse {
2062                    text: Some(text),
2063                    tool_calls: vec![],
2064                    usage: None,
2065                    reasoning_content: None,
2066                });
2067            }
2068
2069            if status == reqwest::StatusCode::NOT_FOUND && self.supports_responses_fallback {
2070                return self
2071                    .chat_via_responses(credential, &effective_messages, model)
2072                    .await
2073                    .map(|text| ProviderChatResponse {
2074                        text: Some(text),
2075                        tool_calls: vec![],
2076                        usage: None,
2077                        reasoning_content: None,
2078                    })
2079                    .map_err(|responses_err| {
2080                        anyhow::anyhow!(
2081                            "{} API error ({status}): {sanitized} (chat completions unavailable; responses fallback failed: {responses_err})",
2082                            self.name
2083                        )
2084                    });
2085            }
2086
2087            anyhow::bail!("{} API error ({status}): {sanitized}", self.name);
2088        }
2089
2090        let native_response: ApiChatResponse = response.json().await?;
2091        let usage = native_response.usage.map(|u| TokenUsage {
2092            input_tokens: u.prompt_tokens,
2093            output_tokens: u.completion_tokens,
2094            cached_input_tokens: None,
2095        });
2096        let message = native_response
2097            .choices
2098            .into_iter()
2099            .next()
2100            .map(|choice| choice.message)
2101            .ok_or_else(|| anyhow::anyhow!("No response from {}", self.name))?;
2102
2103        let mut result = Self::parse_native_response(message);
2104        result.usage = usage;
2105        Ok(result)
2106    }
2107
2108    fn supports_native_tools(&self) -> bool {
2109        self.native_tool_calling
2110    }
2111
2112    fn supports_streaming(&self) -> bool {
2113        true
2114    }
2115
2116    fn supports_streaming_tool_events(&self) -> bool {
2117        self.native_tool_calling
2118    }
2119
2120    fn stream_chat(
2121        &self,
2122        request: ProviderChatRequest<'_>,
2123        model: &str,
2124        temperature: f64,
2125        options: StreamOptions,
2126    ) -> stream::BoxStream<'static, StreamResult<StreamEvent>> {
2127        if !options.enabled {
2128            return stream::once(async { Ok(StreamEvent::Final) }).boxed();
2129        }
2130
2131        let credential = match self.credential.as_ref() {
2132            Some(value) => value.clone(),
2133            None => {
2134                let provider_name = self.name.clone();
2135                return stream::once(async move {
2136                    Err(StreamError::Provider(format!(
2137                        "{} API key not set",
2138                        provider_name
2139                    )))
2140                })
2141                .boxed();
2142            }
2143        };
2144
2145        let has_tools = request.tools.is_some_and(|tools| !tools.is_empty());
2146        let effective_messages = if self.merge_system_into_user {
2147            Self::flatten_system_messages(request.messages)
2148        } else {
2149            request.messages.to_vec()
2150        };
2151
2152        let tools = Self::convert_tool_specs(request.tools);
2153        let payload = if has_tools {
2154            serde_json::to_value(NativeChatRequest {
2155                model: model.to_string(),
2156                messages: Self::convert_messages_for_native(
2157                    &effective_messages,
2158                    !self.merge_system_into_user,
2159                ),
2160                temperature,
2161                reasoning_effort: self.reasoning_effort.clone(),
2162                // `tool_stream` is a z.ai-specific extension; OpenAI-compatible
2163                // endpoints like NVIDIA NIM reject it as an unknown parameter
2164                // (400 "Unsupported parameter(s): tool_stream"). Gate on the
2165                // same `requires_tool_stream` check used elsewhere so we only
2166                // send it to providers that actually accept it.
2167                tool_stream: if options.enabled {
2168                    self.tool_stream_for_tools(true)
2169                } else {
2170                    None
2171                },
2172                stream: Some(options.enabled),
2173                stream_options: if options.enabled {
2174                    Some(StreamOptionsPayload {
2175                        include_usage: true,
2176                    })
2177                } else {
2178                    None
2179                },
2180                tools: tools.clone(),
2181                tool_choice: tools.as_ref().map(|_| "auto".to_string()),
2182                max_tokens: self.max_tokens,
2183            })
2184        } else {
2185            let messages = effective_messages
2186                .iter()
2187                .map(|message| Message {
2188                    role: message.role.clone(),
2189                    content: Self::to_message_content(
2190                        &message.role,
2191                        &message.content,
2192                        !self.merge_system_into_user,
2193                    ),
2194                })
2195                .collect();
2196
2197            serde_json::to_value(ApiChatRequest {
2198                model: model.to_string(),
2199                messages,
2200                temperature,
2201                reasoning_effort: self.reasoning_effort.clone(),
2202                // No tools in this branch, so `tool_stream` is meaningless
2203                // regardless of provider. Always None.
2204                tool_stream: None,
2205                stream: Some(options.enabled),
2206                stream_options: if options.enabled {
2207                    Some(StreamOptionsPayload {
2208                        include_usage: true,
2209                    })
2210                } else {
2211                    None
2212                },
2213                tools: None,
2214                tool_choice: None,
2215                max_tokens: self.max_tokens,
2216            })
2217        };
2218
2219        let payload = match payload {
2220            Ok(payload) => payload,
2221            Err(error) => {
2222                return stream::once(async move { Err(StreamError::Json(error)) }).boxed();
2223            }
2224        };
2225
2226        let url = self.chat_completions_url();
2227        let client = self.http_client();
2228        let auth_header = self.auth_header.clone();
2229        let count_tokens = options.count_tokens;
2230
2231        let (tx, rx) = tokio::sync::mpsc::channel::<StreamResult<StreamEvent>>(100);
2232
2233        tokio::spawn(async move {
2234            let mut req_builder = client.post(&url).json(&payload);
2235
2236            req_builder = match &auth_header {
2237                AuthStyle::Bearer => {
2238                    req_builder.header("Authorization", format!("Bearer {}", credential))
2239                }
2240                AuthStyle::XApiKey => req_builder.header("x-api-key", &credential),
2241                AuthStyle::Custom(header) => req_builder.header(header, &credential),
2242            };
2243            req_builder = req_builder.header("Accept", "text/event-stream");
2244
2245            let response = match req_builder.send().await {
2246                Ok(r) => r,
2247                Err(e) => {
2248                    let _ = tx.send(Err(StreamError::Http(e))).await;
2249                    return;
2250                }
2251            };
2252
2253            if !response.status().is_success() {
2254                let status = response.status();
2255                let error = match response.text().await {
2256                    Ok(text) => text,
2257                    Err(_) => format!("HTTP error: {}", status),
2258                };
2259                let _ = tx
2260                    .send(Err(StreamError::Provider(format!("{}: {}", status, error))))
2261                    .await;
2262                return;
2263            }
2264
2265            let mut event_stream = sse_bytes_to_events(response, count_tokens);
2266            while let Some(event) = event_stream.next().await {
2267                if tx.send(event).await.is_err() {
2268                    break;
2269                }
2270            }
2271        });
2272
2273        stream::unfold(rx, |mut rx| async move {
2274            rx.recv().await.map(|event| (event, rx))
2275        })
2276        .boxed()
2277    }
2278
2279    fn stream_chat_with_system(
2280        &self,
2281        system_prompt: Option<&str>,
2282        message: &str,
2283        model: &str,
2284        temperature: f64,
2285        options: StreamOptions,
2286    ) -> stream::BoxStream<'static, StreamResult<StreamChunk>> {
2287        let credential = match self.credential.as_ref() {
2288            Some(value) => value.clone(),
2289            None => {
2290                let provider_name = self.name.clone();
2291                return stream::once(async move {
2292                    Err(StreamError::Provider(format!(
2293                        "{} API key not set",
2294                        provider_name
2295                    )))
2296                })
2297                .boxed();
2298            }
2299        };
2300
2301        let mut messages = Vec::new();
2302        if let Some(sys) = system_prompt {
2303            messages.push(Message {
2304                role: "system".to_string(),
2305                content: MessageContent::Text(sys.to_string()),
2306            });
2307        }
2308        messages.push(Message {
2309            role: "user".to_string(),
2310            content: Self::to_message_content("user", message, !self.merge_system_into_user),
2311        });
2312
2313        let request = ApiChatRequest {
2314            model: model.to_string(),
2315            messages,
2316            temperature,
2317            stream: Some(options.enabled),
2318            stream_options: None,
2319            reasoning_effort: self.reasoning_effort_for_model(model),
2320            tool_stream: None,
2321            tools: None,
2322            tool_choice: None,
2323            max_tokens: self.max_tokens,
2324        };
2325
2326        let url = self.chat_completions_url();
2327        let client = self.http_client();
2328        let auth_header = self.auth_header.clone();
2329
2330        // Use a channel to bridge the async HTTP response to the stream
2331        let (tx, rx) = tokio::sync::mpsc::channel::<StreamResult<StreamChunk>>(100);
2332
2333        tokio::spawn(async move {
2334            // Build request with auth
2335            let mut req_builder = client.post(&url).json(&request);
2336
2337            // Apply auth header
2338            req_builder = match &auth_header {
2339                AuthStyle::Bearer => {
2340                    req_builder.header("Authorization", format!("Bearer {}", credential))
2341                }
2342                AuthStyle::XApiKey => req_builder.header("x-api-key", &credential),
2343                AuthStyle::Custom(header) => req_builder.header(header, &credential),
2344            };
2345
2346            // Set accept header for streaming
2347            req_builder = req_builder.header("Accept", "text/event-stream");
2348
2349            // Send request
2350            let response = match req_builder.send().await {
2351                Ok(r) => r,
2352                Err(e) => {
2353                    let _ = tx.send(Err(StreamError::Http(e))).await;
2354                    return;
2355                }
2356            };
2357
2358            // Check status
2359            if !response.status().is_success() {
2360                let status = response.status();
2361                let error = match response.text().await {
2362                    Ok(e) => e,
2363                    Err(_) => format!("HTTP error: {}", status),
2364                };
2365                let _ = tx
2366                    .send(Err(StreamError::Provider(format!("{}: {}", status, error))))
2367                    .await;
2368                return;
2369            }
2370
2371            // Convert to chunk stream and forward to channel
2372            let mut chunk_stream = sse_bytes_to_chunks(response, options.count_tokens);
2373            while let Some(chunk) = chunk_stream.next().await {
2374                if tx.send(chunk).await.is_err() {
2375                    break; // Receiver dropped
2376                }
2377            }
2378        });
2379
2380        // Convert channel receiver to stream
2381        stream::unfold(rx, |mut rx| async move {
2382            rx.recv().await.map(|chunk| (chunk, rx))
2383        })
2384        .boxed()
2385    }
2386
2387    fn stream_chat_with_history(
2388        &self,
2389        messages: &[ChatMessage],
2390        model: &str,
2391        temperature: f64,
2392        options: StreamOptions,
2393    ) -> stream::BoxStream<'static, StreamResult<StreamChunk>> {
2394        let credential = match self.credential.as_ref() {
2395            Some(value) => value.clone(),
2396            None => {
2397                let provider_name = self.name.clone();
2398                return stream::once(async move {
2399                    Err(StreamError::Provider(format!(
2400                        "{} API key not set",
2401                        provider_name
2402                    )))
2403                })
2404                .boxed();
2405            }
2406        };
2407
2408        let effective_messages = if self.merge_system_into_user {
2409            Self::flatten_system_messages(messages)
2410        } else {
2411            messages.to_vec()
2412        };
2413        let api_messages: Vec<Message> = effective_messages
2414            .iter()
2415            .map(|m| Message {
2416                role: m.role.clone(),
2417                content: Self::to_message_content(
2418                    &m.role,
2419                    &m.content,
2420                    !self.merge_system_into_user,
2421                ),
2422            })
2423            .collect();
2424
2425        let request = ApiChatRequest {
2426            model: model.to_string(),
2427            messages: api_messages,
2428            temperature,
2429            stream: Some(options.enabled),
2430            stream_options: None,
2431            reasoning_effort: self.reasoning_effort_for_model(model),
2432            tool_stream: None,
2433            tools: None,
2434            tool_choice: None,
2435            max_tokens: self.max_tokens,
2436        };
2437
2438        let url = self.chat_completions_url();
2439        let client = self.http_client();
2440        let auth_header = self.auth_header.clone();
2441
2442        let (tx, rx) = tokio::sync::mpsc::channel::<StreamResult<StreamChunk>>(100);
2443
2444        tokio::spawn(async move {
2445            let mut req_builder = client.post(&url).json(&request);
2446
2447            req_builder = match &auth_header {
2448                AuthStyle::Bearer => {
2449                    req_builder.header("Authorization", format!("Bearer {}", credential))
2450                }
2451                AuthStyle::XApiKey => req_builder.header("x-api-key", &credential),
2452                AuthStyle::Custom(header) => req_builder.header(header, &credential),
2453            };
2454
2455            req_builder = req_builder.header("Accept", "text/event-stream");
2456
2457            let response = match req_builder.send().await {
2458                Ok(r) => r,
2459                Err(e) => {
2460                    let _ = tx.send(Err(StreamError::Http(e))).await;
2461                    return;
2462                }
2463            };
2464
2465            if !response.status().is_success() {
2466                let status = response.status();
2467                let error = match response.text().await {
2468                    Ok(e) => e,
2469                    Err(_) => format!("HTTP error: {}", status),
2470                };
2471                let _ = tx
2472                    .send(Err(StreamError::Provider(format!("{}: {}", status, error))))
2473                    .await;
2474                return;
2475            }
2476
2477            let mut chunk_stream = sse_bytes_to_chunks(response, options.count_tokens);
2478            while let Some(chunk) = chunk_stream.next().await {
2479                if tx.send(chunk).await.is_err() {
2480                    break;
2481                }
2482            }
2483        });
2484
2485        stream::unfold(rx, |mut rx| async move {
2486            rx.recv().await.map(|chunk| (chunk, rx))
2487        })
2488        .boxed()
2489    }
2490
2491    async fn warmup(&self) -> anyhow::Result<()> {
2492        if let Some(credential) = self.credential.as_ref() {
2493            // Hit the chat completions URL with a GET to establish the connection pool.
2494            // The server will likely return 405 Method Not Allowed, which is fine -
2495            // the goal is TLS handshake and HTTP/2 negotiation.
2496            let url = self.chat_completions_url();
2497            let _ = self
2498                .apply_auth_header(self.http_client().get(&url), credential)
2499                .send()
2500                .await?;
2501        }
2502        Ok(())
2503    }
2504}
2505
2506#[cfg(test)]
2507mod tests {
2508    use super::*;
2509
2510    fn make_provider(name: &str, url: &str, key: Option<&str>) -> OpenAiCompatibleProvider {
2511        OpenAiCompatibleProvider::new(name, url, key, AuthStyle::Bearer)
2512    }
2513
2514    #[test]
2515    fn creates_with_key() {
2516        let p = make_provider(
2517            "venice",
2518            "https://api.venice.ai",
2519            Some("venice-test-credential"),
2520        );
2521        assert_eq!(p.name, "venice");
2522        assert_eq!(p.base_url, "https://api.venice.ai");
2523        assert_eq!(p.credential.as_deref(), Some("venice-test-credential"));
2524    }
2525
2526    #[test]
2527    fn creates_without_key() {
2528        let p = make_provider("test", "https://example.com", None);
2529        assert!(p.credential.is_none());
2530    }
2531
2532    #[test]
2533    fn strips_trailing_slash() {
2534        let p = make_provider("test", "https://example.com/", None);
2535        assert_eq!(p.base_url, "https://example.com");
2536    }
2537
2538    #[tokio::test]
2539    async fn chat_fails_without_key() {
2540        let p = make_provider("Venice", "https://api.venice.ai", None);
2541        let result = p
2542            .chat_with_system(None, "hello", "llama-3.3-70b", 0.7)
2543            .await;
2544        assert!(result.is_err());
2545        assert!(
2546            result
2547                .unwrap_err()
2548                .to_string()
2549                .contains("Venice API key not set")
2550        );
2551    }
2552
2553    #[test]
2554    fn request_serializes_correctly() {
2555        let req = ApiChatRequest {
2556            model: "llama-3.3-70b".to_string(),
2557            messages: vec![
2558                Message {
2559                    role: "system".to_string(),
2560                    content: MessageContent::Text("You are Construct".to_string()),
2561                },
2562                Message {
2563                    role: "user".to_string(),
2564                    content: MessageContent::Text("hello".to_string()),
2565                },
2566            ],
2567            temperature: 0.4,
2568            stream: Some(false),
2569            stream_options: None,
2570            reasoning_effort: None,
2571            tool_stream: None,
2572            tools: None,
2573            tool_choice: None,
2574            max_tokens: None,
2575        };
2576        let json = serde_json::to_string(&req).unwrap();
2577        assert!(json.contains("llama-3.3-70b"));
2578        assert!(json.contains("system"));
2579        assert!(json.contains("user"));
2580        // tools/tool_choice should be omitted when None
2581        assert!(!json.contains("tools"));
2582        assert!(!json.contains("tool_choice"));
2583    }
2584
2585    #[test]
2586    fn response_deserializes() {
2587        let json = r#"{"choices":[{"message":{"content":"Hello from Venice!"}}]}"#;
2588        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
2589        assert_eq!(
2590            resp.choices[0].message.content,
2591            Some("Hello from Venice!".to_string())
2592        );
2593    }
2594
2595    #[test]
2596    fn response_empty_choices() {
2597        let json = r#"{"choices":[]}"#;
2598        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
2599        assert!(resp.choices.is_empty());
2600    }
2601
2602    #[test]
2603    fn parse_chat_response_body_reports_sanitized_snippet() {
2604        let body = r#"{"choices":"invalid","api_key":"sk-test-secret-value"}"#;
2605        let err = parse_chat_response_body("custom", body).expect_err("payload should fail");
2606        let msg = err.to_string();
2607
2608        assert!(msg.contains("custom API returned an unexpected chat-completions payload"));
2609        assert!(msg.contains("body="));
2610        assert!(msg.contains("[REDACTED]"));
2611        assert!(!msg.contains("sk-test-secret-value"));
2612    }
2613
2614    #[test]
2615    fn parse_responses_response_body_reports_sanitized_snippet() {
2616        let body = r#"{"output_text":123,"api_key":"sk-another-secret"}"#;
2617        let err = parse_responses_response_body("custom", body).expect_err("payload should fail");
2618        let msg = err.to_string();
2619
2620        assert!(msg.contains("custom Responses API returned an unexpected payload"));
2621        assert!(msg.contains("body="));
2622        assert!(msg.contains("[REDACTED]"));
2623        assert!(!msg.contains("sk-another-secret"));
2624    }
2625
2626    #[test]
2627    fn x_api_key_auth_style() {
2628        let p = OpenAiCompatibleProvider::new(
2629            "moonshot",
2630            "https://api.moonshot.cn",
2631            Some("ms-key"),
2632            AuthStyle::XApiKey,
2633        );
2634        assert!(matches!(p.auth_header, AuthStyle::XApiKey));
2635    }
2636
2637    #[test]
2638    fn custom_auth_style() {
2639        let p = OpenAiCompatibleProvider::new(
2640            "custom",
2641            "https://api.example.com",
2642            Some("key"),
2643            AuthStyle::Custom("X-Custom-Key".into()),
2644        );
2645        assert!(matches!(p.auth_header, AuthStyle::Custom(_)));
2646    }
2647
2648    #[tokio::test]
2649    async fn all_compatible_providers_fail_without_key() {
2650        let providers = vec![
2651            make_provider("Venice", "https://api.venice.ai", None),
2652            make_provider("Moonshot", "https://api.moonshot.cn", None),
2653            make_provider("GLM", "https://open.bigmodel.cn", None),
2654            make_provider("MiniMax", "https://api.minimaxi.com/v1", None),
2655            make_provider("Groq", "https://api.groq.com/openai", None),
2656            make_provider("Mistral", "https://api.mistral.ai", None),
2657            make_provider("xAI", "https://api.x.ai", None),
2658            make_provider("Astrai", "https://as-trai.com/v1", None),
2659        ];
2660
2661        for p in providers {
2662            let result = p.chat_with_system(None, "test", "model", 0.7).await;
2663            assert!(result.is_err(), "{} should fail without key", p.name);
2664            assert!(
2665                result.unwrap_err().to_string().contains("API key not set"),
2666                "{} error should mention key",
2667                p.name
2668            );
2669        }
2670    }
2671
2672    #[test]
2673    fn responses_extracts_top_level_output_text() {
2674        let json = r#"{"output_text":"Hello from top-level","output":[]}"#;
2675        let response: ResponsesResponse = serde_json::from_str(json).unwrap();
2676        assert_eq!(
2677            extract_responses_text(response).as_deref(),
2678            Some("Hello from top-level")
2679        );
2680    }
2681
2682    #[test]
2683    fn responses_extracts_nested_output_text() {
2684        let json =
2685            r#"{"output":[{"content":[{"type":"output_text","text":"Hello from nested"}]}]}"#;
2686        let response: ResponsesResponse = serde_json::from_str(json).unwrap();
2687        assert_eq!(
2688            extract_responses_text(response).as_deref(),
2689            Some("Hello from nested")
2690        );
2691    }
2692
2693    #[test]
2694    fn responses_extracts_any_text_as_fallback() {
2695        let json = r#"{"output":[{"content":[{"type":"message","text":"Fallback text"}]}]}"#;
2696        let response: ResponsesResponse = serde_json::from_str(json).unwrap();
2697        assert_eq!(
2698            extract_responses_text(response).as_deref(),
2699            Some("Fallback text")
2700        );
2701    }
2702
2703    #[test]
2704    fn build_responses_prompt_preserves_multi_turn_history() {
2705        let messages = vec![
2706            ChatMessage::system("policy"),
2707            ChatMessage::user("step 1"),
2708            ChatMessage::assistant("ack 1"),
2709            ChatMessage::tool("{\"result\":\"ok\"}"),
2710            ChatMessage::user("step 2"),
2711        ];
2712
2713        let (instructions, input) = build_responses_prompt(&messages);
2714
2715        assert_eq!(instructions.as_deref(), Some("policy"));
2716        assert_eq!(input.len(), 4);
2717
2718        let serialized: Vec<serde_json::Value> = input
2719            .iter()
2720            .map(|item| serde_json::to_value(item).expect("responses input item serializes"))
2721            .collect();
2722        assert_eq!(
2723            serialized[0],
2724            serde_json::json!({
2725                "role": "user",
2726                "content": "step 1"
2727            })
2728        );
2729        assert_eq!(
2730            serialized[1],
2731            serde_json::json!({
2732                "role": "assistant",
2733                "type": "message",
2734                "content": [{
2735                    "type": "output_text",
2736                    "text": "ack 1"
2737                }]
2738            })
2739        );
2740        assert_eq!(
2741            serialized[2],
2742            serde_json::json!({
2743                "role": "assistant",
2744                "type": "message",
2745                "content": [{
2746                    "type": "output_text",
2747                    "text": "{\"result\":\"ok\"}"
2748                }]
2749            })
2750        );
2751        assert_eq!(
2752            serialized[3],
2753            serde_json::json!({
2754                "role": "user",
2755                "content": "step 2"
2756            })
2757        );
2758    }
2759
2760    #[tokio::test]
2761    async fn chat_via_responses_requires_non_system_message() {
2762        let provider = make_provider("custom", "https://api.example.com", Some("test-key"));
2763        let err = provider
2764            .chat_via_responses("test-key", &[ChatMessage::system("policy")], "gpt-test")
2765            .await
2766            .expect_err("system-only fallback payload should fail");
2767
2768        assert!(
2769            err.to_string()
2770                .contains("requires at least one non-system message")
2771        );
2772    }
2773
2774    #[test]
2775    fn tool_call_function_name_falls_back_to_top_level_name() {
2776        let call: ToolCall = serde_json::from_value(serde_json::json!({
2777            "name": "memory_recall",
2778            "arguments": "{\"query\":\"latest roadmap\"}"
2779        }))
2780        .unwrap();
2781
2782        assert_eq!(call.function_name().as_deref(), Some("memory_recall"));
2783    }
2784
2785    #[test]
2786    fn tool_call_function_arguments_falls_back_to_parameters_object() {
2787        let call: ToolCall = serde_json::from_value(serde_json::json!({
2788            "name": "shell",
2789            "parameters": {"command": "pwd"}
2790        }))
2791        .unwrap();
2792
2793        assert_eq!(
2794            call.function_arguments().as_deref(),
2795            Some("{\"command\":\"pwd\"}")
2796        );
2797    }
2798
2799    #[test]
2800    fn tool_call_function_arguments_prefers_nested_function_field() {
2801        let call: ToolCall = serde_json::from_value(serde_json::json!({
2802            "name": "ignored_name",
2803            "arguments": "{\"query\":\"ignored\"}",
2804            "function": {
2805                "name": "memory_recall",
2806                "arguments": "{\"query\":\"preferred\"}"
2807            }
2808        }))
2809        .unwrap();
2810
2811        assert_eq!(call.function_name().as_deref(), Some("memory_recall"));
2812        assert_eq!(
2813            call.function_arguments().as_deref(),
2814            Some("{\"query\":\"preferred\"}")
2815        );
2816    }
2817
2818    // ----------------------------------------------------------
2819    // Custom endpoint path tests (Issue #114)
2820    // ----------------------------------------------------------
2821
2822    #[test]
2823    fn chat_completions_url_standard_openai() {
2824        // Standard OpenAI-compatible providers get /chat/completions appended
2825        let p = make_provider("openai", "https://api.openai.com/v1", None);
2826        assert_eq!(
2827            p.chat_completions_url(),
2828            "https://api.openai.com/v1/chat/completions"
2829        );
2830    }
2831
2832    #[test]
2833    fn chat_completions_url_trailing_slash() {
2834        // Trailing slash is stripped, then /chat/completions appended
2835        let p = make_provider("test", "https://api.example.com/v1/", None);
2836        assert_eq!(
2837            p.chat_completions_url(),
2838            "https://api.example.com/v1/chat/completions"
2839        );
2840    }
2841
2842    #[test]
2843    fn chat_completions_url_volcengine_ark() {
2844        // VolcEngine ARK uses custom path - should use as-is
2845        let p = make_provider(
2846            "volcengine",
2847            "https://ark.cn-beijing.volces.com/api/coding/v3/chat/completions",
2848            None,
2849        );
2850        assert_eq!(
2851            p.chat_completions_url(),
2852            "https://ark.cn-beijing.volces.com/api/coding/v3/chat/completions"
2853        );
2854    }
2855
2856    #[test]
2857    fn chat_completions_url_custom_full_endpoint() {
2858        // Custom provider with full endpoint path
2859        let p = make_provider(
2860            "custom",
2861            "https://my-api.example.com/v2/llm/chat/completions",
2862            None,
2863        );
2864        assert_eq!(
2865            p.chat_completions_url(),
2866            "https://my-api.example.com/v2/llm/chat/completions"
2867        );
2868    }
2869
2870    #[test]
2871    fn chat_completions_url_requires_exact_suffix_match() {
2872        let p = make_provider(
2873            "custom",
2874            "https://my-api.example.com/v2/llm/chat/completions-proxy",
2875            None,
2876        );
2877        assert_eq!(
2878            p.chat_completions_url(),
2879            "https://my-api.example.com/v2/llm/chat/completions-proxy/chat/completions"
2880        );
2881    }
2882
2883    #[test]
2884    fn responses_url_standard() {
2885        // Standard providers get /v1/responses appended
2886        let p = make_provider("test", "https://api.example.com", None);
2887        assert_eq!(p.responses_url(), "https://api.example.com/v1/responses");
2888    }
2889
2890    #[test]
2891    fn responses_url_custom_full_endpoint() {
2892        // Custom provider with full responses endpoint
2893        let p = make_provider(
2894            "custom",
2895            "https://my-api.example.com/api/v2/responses",
2896            None,
2897        );
2898        assert_eq!(
2899            p.responses_url(),
2900            "https://my-api.example.com/api/v2/responses"
2901        );
2902    }
2903
2904    #[test]
2905    fn responses_url_requires_exact_suffix_match() {
2906        let p = make_provider(
2907            "custom",
2908            "https://my-api.example.com/api/v2/responses-proxy",
2909            None,
2910        );
2911        assert_eq!(
2912            p.responses_url(),
2913            "https://my-api.example.com/api/v2/responses-proxy/responses"
2914        );
2915    }
2916
2917    #[test]
2918    fn responses_url_derives_from_chat_endpoint() {
2919        let p = make_provider(
2920            "custom",
2921            "https://my-api.example.com/api/v2/chat/completions",
2922            None,
2923        );
2924        assert_eq!(
2925            p.responses_url(),
2926            "https://my-api.example.com/api/v2/responses"
2927        );
2928    }
2929
2930    #[test]
2931    fn responses_url_base_with_v1_no_duplicate() {
2932        let p = make_provider("test", "https://api.example.com/v1", None);
2933        assert_eq!(p.responses_url(), "https://api.example.com/v1/responses");
2934    }
2935
2936    #[test]
2937    fn responses_url_non_v1_api_path_uses_raw_suffix() {
2938        let p = make_provider("test", "https://api.example.com/api/coding/v3", None);
2939        assert_eq!(
2940            p.responses_url(),
2941            "https://api.example.com/api/coding/v3/responses"
2942        );
2943    }
2944
2945    #[test]
2946    fn chat_completions_url_without_v1() {
2947        // Provider configured without /v1 in base URL
2948        let p = make_provider("test", "https://api.example.com", None);
2949        assert_eq!(
2950            p.chat_completions_url(),
2951            "https://api.example.com/chat/completions"
2952        );
2953    }
2954
2955    #[test]
2956    fn chat_completions_url_base_with_v1() {
2957        // Provider configured with /v1 in base URL
2958        let p = make_provider("test", "https://api.example.com/v1", None);
2959        assert_eq!(
2960            p.chat_completions_url(),
2961            "https://api.example.com/v1/chat/completions"
2962        );
2963    }
2964
2965    // ----------------------------------------------------------
2966    // Provider-specific endpoint tests (Issue #167)
2967    // ----------------------------------------------------------
2968
2969    #[test]
2970    fn chat_completions_url_zai() {
2971        // Z.AI uses /api/paas/v4 base path
2972        let p = make_provider("zai", "https://api.z.ai/api/paas/v4", None);
2973        assert_eq!(
2974            p.chat_completions_url(),
2975            "https://api.z.ai/api/paas/v4/chat/completions"
2976        );
2977    }
2978
2979    #[test]
2980    fn chat_completions_url_minimax() {
2981        // MiniMax OpenAI-compatible endpoint requires /v1 base path.
2982        let p = make_provider("minimax", "https://api.minimaxi.com/v1", None);
2983        assert_eq!(
2984            p.chat_completions_url(),
2985            "https://api.minimaxi.com/v1/chat/completions"
2986        );
2987    }
2988
2989    #[test]
2990    fn chat_completions_url_glm() {
2991        // GLM (BigModel) uses /api/paas/v4 base path
2992        let p = make_provider("glm", "https://open.bigmodel.cn/api/paas/v4", None);
2993        assert_eq!(
2994            p.chat_completions_url(),
2995            "https://open.bigmodel.cn/api/paas/v4/chat/completions"
2996        );
2997    }
2998
2999    #[test]
3000    fn chat_completions_url_opencode() {
3001        // OpenCode Zen uses /zen/v1 base path
3002        let p = make_provider("opencode", "https://opencode.ai/zen/v1", None);
3003        assert_eq!(
3004            p.chat_completions_url(),
3005            "https://opencode.ai/zen/v1/chat/completions"
3006        );
3007    }
3008
3009    #[test]
3010    fn chat_completions_url_opencode_go() {
3011        // OpenCode Go uses /zen/go/v1 base path
3012        let p = make_provider("opencode-go", "https://opencode.ai/zen/go/v1", None);
3013        assert_eq!(
3014            p.chat_completions_url(),
3015            "https://opencode.ai/zen/go/v1/chat/completions"
3016        );
3017    }
3018
3019    #[test]
3020    fn parse_native_response_preserves_tool_call_id() {
3021        let message = ResponseMessage {
3022            content: None,
3023            tool_calls: Some(vec![ToolCall {
3024                id: Some("call_123".to_string()),
3025                kind: Some("function".to_string()),
3026                function: Some(Function {
3027                    name: Some("shell".to_string()),
3028                    arguments: Some(r#"{"command":"pwd"}"#.to_string()),
3029                }),
3030                name: None,
3031                arguments: None,
3032                parameters: None,
3033            }]),
3034            reasoning_content: None,
3035        };
3036
3037        let parsed = OpenAiCompatibleProvider::parse_native_response(message);
3038        assert_eq!(parsed.tool_calls.len(), 1);
3039        assert_eq!(parsed.tool_calls[0].id, "call_123");
3040        assert_eq!(parsed.tool_calls[0].name, "shell");
3041    }
3042
3043    #[test]
3044    fn convert_messages_for_native_maps_tool_result_payload() {
3045        let input = vec![ChatMessage::tool(
3046            r#"{"tool_call_id":"call_abc","content":"done"}"#,
3047        )];
3048
3049        let converted = OpenAiCompatibleProvider::convert_messages_for_native(&input, true);
3050        assert_eq!(converted.len(), 1);
3051        assert_eq!(converted[0].role, "tool");
3052        assert_eq!(converted[0].tool_call_id.as_deref(), Some("call_abc"));
3053        assert!(matches!(
3054            converted[0].content.as_ref(),
3055            Some(MessageContent::Text(value)) if value == "done"
3056        ));
3057    }
3058
3059    #[test]
3060    fn convert_messages_for_native_keeps_user_image_markers_as_text_when_disabled() {
3061        let input = vec![ChatMessage::user(
3062            "System primer [IMAGE:data:image/png;base64,abcd] user turn",
3063        )];
3064
3065        let converted = OpenAiCompatibleProvider::convert_messages_for_native(&input, false);
3066        assert_eq!(converted.len(), 1);
3067        assert_eq!(converted[0].role, "user");
3068        assert!(matches!(
3069            converted[0].content.as_ref(),
3070            Some(MessageContent::Text(value))
3071                if value == "System primer [IMAGE:data:image/png;base64,abcd] user turn"
3072        ));
3073    }
3074
3075    #[test]
3076    fn flatten_system_messages_merges_into_first_user() {
3077        let input = vec![
3078            ChatMessage::system("core policy"),
3079            ChatMessage::assistant("ack"),
3080            ChatMessage::system("delivery rules"),
3081            ChatMessage::user("hello"),
3082            ChatMessage::assistant("post-user"),
3083        ];
3084
3085        let output = OpenAiCompatibleProvider::flatten_system_messages(&input);
3086        assert_eq!(output.len(), 3);
3087        assert_eq!(output[0].role, "assistant");
3088        assert_eq!(output[0].content, "ack");
3089        assert_eq!(output[1].role, "user");
3090        assert_eq!(output[1].content, "core policy\n\ndelivery rules\n\nhello");
3091        assert_eq!(output[2].role, "assistant");
3092        assert_eq!(output[2].content, "post-user");
3093        assert!(output.iter().all(|m| m.role != "system"));
3094    }
3095
3096    #[test]
3097    fn flatten_system_messages_inserts_user_when_missing() {
3098        let input = vec![
3099            ChatMessage::system("core policy"),
3100            ChatMessage::assistant("ack"),
3101        ];
3102
3103        let output = OpenAiCompatibleProvider::flatten_system_messages(&input);
3104        assert_eq!(output.len(), 2);
3105        assert_eq!(output[0].role, "user");
3106        assert_eq!(output[0].content, "core policy");
3107        assert_eq!(output[1].role, "assistant");
3108        assert_eq!(output[1].content, "ack");
3109    }
3110
3111    #[test]
3112    fn strip_think_tags_drops_unclosed_block_suffix() {
3113        let input = "visible<think>hidden";
3114        assert_eq!(strip_think_tags(input), "visible");
3115    }
3116
3117    #[test]
3118    fn native_tool_schema_unsupported_detection_is_precise() {
3119        assert!(OpenAiCompatibleProvider::is_native_tool_schema_unsupported(
3120            reqwest::StatusCode::BAD_REQUEST,
3121            "unknown parameter: tools"
3122        ));
3123        assert!(
3124            !OpenAiCompatibleProvider::is_native_tool_schema_unsupported(
3125                reqwest::StatusCode::UNAUTHORIZED,
3126                "unknown parameter: tools"
3127            )
3128        );
3129    }
3130
3131    #[test]
3132    fn native_tool_schema_unsupported_detects_groq_tool_validation_error() {
3133        assert!(OpenAiCompatibleProvider::is_native_tool_schema_unsupported(
3134            reqwest::StatusCode::BAD_REQUEST,
3135            r#"Groq API error (400 Bad Request): {"error":{"message":"tool call validation failed: attempted to call tool 'memory_recall={\"limit\":5}' which was not in request"}}"#
3136        ));
3137    }
3138
3139    #[test]
3140    fn prompt_guided_tool_fallback_injects_system_instruction() {
3141        let input = vec![ChatMessage::user("check status")];
3142        let tools = vec![crate::tools::ToolSpec {
3143            name: "shell_exec".to_string(),
3144            description: "Execute shell command".to_string(),
3145            parameters: serde_json::json!({
3146                "type": "object",
3147                "properties": {
3148                    "command": { "type": "string" }
3149                },
3150                "required": ["command"]
3151            }),
3152        }];
3153
3154        let output =
3155            OpenAiCompatibleProvider::with_prompt_guided_tool_instructions(&input, Some(&tools));
3156        assert!(!output.is_empty());
3157        assert_eq!(output[0].role, "system");
3158        assert!(output[0].content.contains("Available Tools"));
3159        assert!(output[0].content.contains("shell_exec"));
3160    }
3161
3162    #[test]
3163    fn reasoning_effort_only_applies_to_gpt5_and_codex_models() {
3164        let provider = make_provider("test", "https://example.com", None)
3165            .with_reasoning_effort(Some("high".to_string()));
3166
3167        assert_eq!(
3168            provider.reasoning_effort_for_model("gpt-5.3-codex"),
3169            Some("high".to_string())
3170        );
3171        assert_eq!(
3172            provider.reasoning_effort_for_model("openai/gpt-5"),
3173            Some("high".to_string())
3174        );
3175        assert_eq!(provider.reasoning_effort_for_model("llama-3.3-70b"), None);
3176    }
3177
3178    #[tokio::test]
3179    async fn warmup_without_key_is_noop() {
3180        let provider = make_provider("test", "https://example.com", None);
3181        let result = provider.warmup().await;
3182        assert!(result.is_ok());
3183    }
3184
3185    // ══════════════════════════════════════════════════════════
3186    // Native tool calling tests
3187    // ══════════════════════════════════════════════════════════
3188
3189    #[test]
3190    fn capabilities_reports_native_tool_calling() {
3191        let p = make_provider("test", "https://example.com", None);
3192        let caps = <OpenAiCompatibleProvider as Provider>::capabilities(&p);
3193        assert!(caps.native_tool_calling);
3194        assert!(!caps.vision);
3195    }
3196
3197    #[test]
3198    fn capabilities_reports_vision_for_qwen_compatible_provider() {
3199        let p = OpenAiCompatibleProvider::new_with_vision(
3200            "Qwen",
3201            "https://dashscope.aliyuncs.com/compatible-mode/v1",
3202            Some("k"),
3203            AuthStyle::Bearer,
3204            true,
3205        );
3206        let caps = <OpenAiCompatibleProvider as Provider>::capabilities(&p);
3207        assert!(caps.native_tool_calling);
3208        assert!(caps.vision);
3209    }
3210
3211    #[test]
3212    fn minimax_provider_disables_native_tool_calling() {
3213        let p = OpenAiCompatibleProvider::new_merge_system_into_user(
3214            "MiniMax",
3215            "https://api.minimax.chat/v1",
3216            Some("k"),
3217            AuthStyle::Bearer,
3218        );
3219        let caps = <OpenAiCompatibleProvider as Provider>::capabilities(&p);
3220        assert!(
3221            !caps.native_tool_calling,
3222            "MiniMax should use prompt-guided tool calling, not native"
3223        );
3224        assert!(!caps.vision);
3225    }
3226
3227    #[test]
3228    fn user_agent_constructor_keeps_native_tool_calling_enabled() {
3229        let p = OpenAiCompatibleProvider::new_with_user_agent(
3230            "TestProvider",
3231            "https://example.com",
3232            Some("k"),
3233            AuthStyle::Bearer,
3234            "construct-test/1.0",
3235        );
3236        let caps = <OpenAiCompatibleProvider as Provider>::capabilities(&p);
3237        assert!(caps.native_tool_calling);
3238        assert!(!caps.vision);
3239        assert_eq!(p.user_agent.as_deref(), Some("construct-test/1.0"));
3240    }
3241
3242    #[test]
3243    fn user_agent_and_vision_constructor_preserves_capability_flags() {
3244        let p = OpenAiCompatibleProvider::new_with_user_agent_and_vision(
3245            "VisionProvider",
3246            "https://example.com",
3247            Some("k"),
3248            AuthStyle::Bearer,
3249            "construct-test/vision",
3250            true,
3251        );
3252        let caps = <OpenAiCompatibleProvider as Provider>::capabilities(&p);
3253        assert!(caps.native_tool_calling);
3254        assert!(caps.vision);
3255        assert_eq!(p.user_agent.as_deref(), Some("construct-test/vision"));
3256    }
3257
3258    #[test]
3259    fn no_responses_fallback_constructor_keeps_native_tool_calling_enabled() {
3260        let p = OpenAiCompatibleProvider::new_no_responses_fallback(
3261            "FallbackProvider",
3262            "https://example.com",
3263            Some("k"),
3264            AuthStyle::Bearer,
3265        );
3266        let caps = <OpenAiCompatibleProvider as Provider>::capabilities(&p);
3267        assert!(caps.native_tool_calling);
3268        assert!(!caps.vision);
3269        assert!(p.user_agent.is_none());
3270    }
3271
3272    #[test]
3273    fn to_message_content_converts_image_markers_to_openai_parts() {
3274        let content = "Describe this\n\n[IMAGE:data:image/png;base64,abcd]";
3275        let value = serde_json::to_value(OpenAiCompatibleProvider::to_message_content(
3276            "user", content, true,
3277        ))
3278        .unwrap();
3279        let parts = value
3280            .as_array()
3281            .expect("multimodal content should be an array");
3282        assert_eq!(parts.len(), 2);
3283        assert_eq!(parts[0]["type"], "text");
3284        assert_eq!(parts[0]["text"], "Describe this");
3285        assert_eq!(parts[1]["type"], "image_url");
3286        assert_eq!(parts[1]["image_url"]["url"], "data:image/png;base64,abcd");
3287    }
3288
3289    #[test]
3290    fn to_message_content_keeps_markers_as_text_when_user_image_parts_disabled() {
3291        let content = "Policy [IMAGE:data:image/png;base64,abcd]";
3292        let value = serde_json::to_value(OpenAiCompatibleProvider::to_message_content(
3293            "user", content, false,
3294        ))
3295        .unwrap();
3296        assert_eq!(value, serde_json::json!(content));
3297    }
3298
3299    #[test]
3300    fn to_message_content_keeps_plain_text_for_non_user_roles() {
3301        let value = serde_json::to_value(OpenAiCompatibleProvider::to_message_content(
3302            "system",
3303            "You are a helpful assistant.",
3304            true,
3305        ))
3306        .unwrap();
3307        assert_eq!(value, serde_json::json!("You are a helpful assistant."));
3308    }
3309
3310    #[test]
3311    fn tool_specs_convert_to_openai_format() {
3312        let specs = vec![crate::tools::ToolSpec {
3313            name: "shell".to_string(),
3314            description: "Run shell command".to_string(),
3315            parameters: serde_json::json!({
3316                "type": "object",
3317                "properties": {"command": {"type": "string"}},
3318                "required": ["command"]
3319            }),
3320        }];
3321
3322        let tools = OpenAiCompatibleProvider::tool_specs_to_openai_format(&specs);
3323        assert_eq!(tools.len(), 1);
3324        assert_eq!(tools[0]["type"], "function");
3325        assert_eq!(tools[0]["function"]["name"], "shell");
3326        assert_eq!(tools[0]["function"]["description"], "Run shell command");
3327        assert_eq!(tools[0]["function"]["parameters"]["required"][0], "command");
3328    }
3329
3330    #[test]
3331    fn request_serializes_with_tools() {
3332        let tools = vec![serde_json::json!({
3333            "type": "function",
3334            "function": {
3335                "name": "get_weather",
3336                "description": "Get weather for a location",
3337                "parameters": {
3338                    "type": "object",
3339                    "properties": {
3340                        "location": {"type": "string"}
3341                    }
3342                }
3343            }
3344        })];
3345
3346        let req = ApiChatRequest {
3347            model: "test-model".to_string(),
3348            messages: vec![Message {
3349                role: "user".to_string(),
3350                content: MessageContent::Text("What is the weather?".to_string()),
3351            }],
3352            temperature: 0.7,
3353            stream: Some(false),
3354            stream_options: None,
3355            reasoning_effort: None,
3356            tool_stream: None,
3357            tools: Some(tools),
3358            tool_choice: Some("auto".to_string()),
3359            max_tokens: None,
3360        };
3361        let json = serde_json::to_string(&req).unwrap();
3362        assert!(json.contains("\"tools\""));
3363        assert!(json.contains("get_weather"));
3364        assert!(json.contains("\"tool_choice\":\"auto\""));
3365    }
3366
3367    #[test]
3368    fn zai_tool_requests_enable_tool_stream() {
3369        let provider = make_provider("zai", "https://api.z.ai/api/paas/v4", None);
3370        let req = ApiChatRequest {
3371            model: "glm-5".to_string(),
3372            messages: vec![Message {
3373                role: "user".to_string(),
3374                content: MessageContent::Text("List /tmp".to_string()),
3375            }],
3376            temperature: 0.7,
3377            stream: Some(false),
3378            stream_options: None,
3379            reasoning_effort: None,
3380            tool_stream: provider.tool_stream_for_tools(true),
3381            tools: Some(vec![serde_json::json!({
3382                "type": "function",
3383                "function": {
3384                    "name": "shell",
3385                    "description": "Run a shell command",
3386                    "parameters": {
3387                        "type": "object",
3388                        "properties": {
3389                            "command": {"type": "string"}
3390                        }
3391                    }
3392                }
3393            })]),
3394            tool_choice: Some("auto".to_string()),
3395            max_tokens: None,
3396        };
3397
3398        let json = serde_json::to_string(&req).unwrap();
3399        assert!(json.contains("\"tool_stream\":true"));
3400    }
3401
3402    #[test]
3403    fn non_zai_tool_requests_omit_tool_stream() {
3404        let provider = make_provider("test", "https://api.example.com/v1", None);
3405        let req = ApiChatRequest {
3406            model: "test-model".to_string(),
3407            messages: vec![Message {
3408                role: "user".to_string(),
3409                content: MessageContent::Text("List /tmp".to_string()),
3410            }],
3411            temperature: 0.7,
3412            stream: Some(false),
3413            stream_options: None,
3414            reasoning_effort: None,
3415            tool_stream: provider.tool_stream_for_tools(true),
3416            tools: Some(vec![serde_json::json!({
3417                "type": "function",
3418                "function": {
3419                    "name": "shell",
3420                    "description": "Run a shell command",
3421                    "parameters": {
3422                        "type": "object",
3423                        "properties": {
3424                            "command": {"type": "string"}
3425                        }
3426                    }
3427                }
3428            })]),
3429            tool_choice: Some("auto".to_string()),
3430            max_tokens: None,
3431        };
3432
3433        let json = serde_json::to_string(&req).unwrap();
3434        assert!(!json.contains("\"tool_stream\""));
3435    }
3436
3437    #[test]
3438    fn z_ai_host_enables_tool_stream_for_custom_profiles() {
3439        let provider = make_provider("custom", "https://api.z.ai/api/coding/paas/v4", None);
3440        assert_eq!(provider.tool_stream_for_tools(true), Some(true));
3441    }
3442
3443    #[test]
3444    fn response_with_tool_calls_deserializes() {
3445        let json = r#"{
3446            "choices": [{
3447                "message": {
3448                    "content": null,
3449                    "tool_calls": [{
3450                        "type": "function",
3451                        "function": {
3452                            "name": "get_weather",
3453                            "arguments": "{\"location\":\"London\"}"
3454                        }
3455                    }]
3456                }
3457            }]
3458        }"#;
3459
3460        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
3461        let msg = &resp.choices[0].message;
3462        assert!(msg.content.is_none());
3463        let tool_calls = msg.tool_calls.as_ref().unwrap();
3464        assert_eq!(tool_calls.len(), 1);
3465        assert_eq!(
3466            tool_calls[0].function.as_ref().unwrap().name.as_deref(),
3467            Some("get_weather")
3468        );
3469        assert_eq!(
3470            tool_calls[0]
3471                .function
3472                .as_ref()
3473                .unwrap()
3474                .arguments
3475                .as_deref(),
3476            Some("{\"location\":\"London\"}")
3477        );
3478    }
3479
3480    #[test]
3481    fn response_with_multiple_tool_calls() {
3482        let json = r#"{
3483            "choices": [{
3484                "message": {
3485                    "content": "I'll check both.",
3486                    "tool_calls": [
3487                        {
3488                            "type": "function",
3489                            "function": {
3490                                "name": "get_weather",
3491                                "arguments": "{\"location\":\"London\"}"
3492                            }
3493                        },
3494                        {
3495                            "type": "function",
3496                            "function": {
3497                                "name": "get_time",
3498                                "arguments": "{\"timezone\":\"UTC\"}"
3499                            }
3500                        }
3501                    ]
3502                }
3503            }]
3504        }"#;
3505
3506        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
3507        let msg = &resp.choices[0].message;
3508        assert_eq!(msg.content.as_deref(), Some("I'll check both."));
3509        let tool_calls = msg.tool_calls.as_ref().unwrap();
3510        assert_eq!(tool_calls.len(), 2);
3511        assert_eq!(
3512            tool_calls[0].function.as_ref().unwrap().name.as_deref(),
3513            Some("get_weather")
3514        );
3515        assert_eq!(
3516            tool_calls[1].function.as_ref().unwrap().name.as_deref(),
3517            Some("get_time")
3518        );
3519    }
3520
3521    #[tokio::test]
3522    async fn chat_with_tools_fails_without_key() {
3523        let p = make_provider("TestProvider", "https://example.com", None);
3524        let messages = vec![ChatMessage {
3525            role: "user".to_string(),
3526            content: "hello".to_string(),
3527        }];
3528        let tools = vec![serde_json::json!({
3529            "type": "function",
3530            "function": {
3531                "name": "test_tool",
3532                "description": "A test tool",
3533                "parameters": {}
3534            }
3535        })];
3536
3537        let result = p.chat_with_tools(&messages, &tools, "model", 0.7).await;
3538        assert!(result.is_err());
3539        assert!(
3540            result
3541                .unwrap_err()
3542                .to_string()
3543                .contains("TestProvider API key not set")
3544        );
3545    }
3546
3547    #[test]
3548    fn response_with_no_tool_calls_has_empty_vec() {
3549        let json = r#"{"choices":[{"message":{"content":"Just text, no tools."}}]}"#;
3550        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
3551        let msg = &resp.choices[0].message;
3552        assert_eq!(msg.content.as_deref(), Some("Just text, no tools."));
3553        assert!(msg.tool_calls.is_none());
3554    }
3555
3556    #[test]
3557    fn flatten_system_messages_merges_into_first_user_and_removes_system_roles() {
3558        let messages = vec![
3559            ChatMessage::system("System A"),
3560            ChatMessage::assistant("Earlier assistant turn"),
3561            ChatMessage::system("System B"),
3562            ChatMessage::user("User turn"),
3563            ChatMessage::tool(r#"{"ok":true}"#),
3564        ];
3565
3566        let flattened = OpenAiCompatibleProvider::flatten_system_messages(&messages);
3567        assert_eq!(flattened.len(), 3);
3568        assert_eq!(flattened[0].role, "assistant");
3569        assert_eq!(
3570            flattened[1].content,
3571            "System A\n\nSystem B\n\nUser turn".to_string()
3572        );
3573        assert_eq!(flattened[1].role, "user");
3574        assert_eq!(flattened[2].role, "tool");
3575        assert!(!flattened.iter().any(|m| m.role == "system"));
3576    }
3577
3578    #[test]
3579    fn flatten_system_messages_inserts_synthetic_user_when_no_user_exists() {
3580        let messages = vec![
3581            ChatMessage::assistant("Assistant only"),
3582            ChatMessage::system("Synthetic system"),
3583        ];
3584
3585        let flattened = OpenAiCompatibleProvider::flatten_system_messages(&messages);
3586        assert_eq!(flattened.len(), 2);
3587        assert_eq!(flattened[0].role, "user");
3588        assert_eq!(flattened[0].content, "Synthetic system");
3589        assert_eq!(flattened[1].role, "assistant");
3590    }
3591
3592    #[test]
3593    fn strip_think_tags_removes_multiple_blocks_with_surrounding_text() {
3594        let input = "Answer A <think>hidden 1</think> and B <think>hidden 2</think> done";
3595        let output = strip_think_tags(input);
3596        assert_eq!(output, "Answer A  and B  done");
3597    }
3598
3599    #[test]
3600    fn strip_think_tags_drops_tail_for_unclosed_block() {
3601        let input = "Visible<think>hidden tail";
3602        let output = strip_think_tags(input);
3603        assert_eq!(output, "Visible");
3604    }
3605
3606    // ----------------------------------------------------------
3607    // Reasoning model fallback tests (reasoning_content)
3608    // ----------------------------------------------------------
3609
3610    #[test]
3611    fn reasoning_content_fallback_when_content_empty() {
3612        // Reasoning models (Qwen3, GLM-4) return content: "" with reasoning_content populated
3613        let json = r#"{"choices":[{"message":{"content":"","reasoning_content":"Thinking output here"}}]}"#;
3614        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
3615        let msg = &resp.choices[0].message;
3616        assert_eq!(msg.effective_content(), "Thinking output here");
3617    }
3618
3619    #[test]
3620    fn reasoning_content_fallback_when_content_null() {
3621        // Some models may return content: null with reasoning_content
3622        let json =
3623            r#"{"choices":[{"message":{"content":null,"reasoning_content":"Fallback text"}}]}"#;
3624        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
3625        let msg = &resp.choices[0].message;
3626        assert_eq!(msg.effective_content(), "Fallback text");
3627    }
3628
3629    #[test]
3630    fn reasoning_content_fallback_when_content_missing() {
3631        // content field absent entirely, reasoning_content present
3632        let json = r#"{"choices":[{"message":{"reasoning_content":"Only reasoning"}}]}"#;
3633        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
3634        let msg = &resp.choices[0].message;
3635        assert_eq!(msg.effective_content(), "Only reasoning");
3636    }
3637
3638    #[test]
3639    fn reasoning_content_not_used_when_content_present() {
3640        // Normal model: content populated, reasoning_content should be ignored
3641        let json = r#"{"choices":[{"message":{"content":"Normal response","reasoning_content":"Should be ignored"}}]}"#;
3642        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
3643        let msg = &resp.choices[0].message;
3644        assert_eq!(msg.effective_content(), "Normal response");
3645    }
3646
3647    #[test]
3648    fn reasoning_content_used_when_content_only_think_tags() {
3649        let json = r#"{"choices":[{"message":{"content":"<think>secret</think>","reasoning_content":"Fallback text"}}]}"#;
3650        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
3651        let msg = &resp.choices[0].message;
3652        assert_eq!(msg.effective_content(), "Fallback text");
3653        assert_eq!(
3654            msg.effective_content_optional().as_deref(),
3655            Some("Fallback text")
3656        );
3657    }
3658
3659    #[test]
3660    fn reasoning_content_both_absent_returns_empty() {
3661        // Neither content nor reasoning_content - returns empty string
3662        let json = r#"{"choices":[{"message":{}}]}"#;
3663        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
3664        let msg = &resp.choices[0].message;
3665        assert_eq!(msg.effective_content(), "");
3666    }
3667
3668    #[test]
3669    fn reasoning_content_ignored_by_normal_models() {
3670        // Standard response without reasoning_content still works
3671        let json = r#"{"choices":[{"message":{"content":"Hello from Venice!"}}]}"#;
3672        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
3673        let msg = &resp.choices[0].message;
3674        assert!(msg.reasoning_content.is_none());
3675        assert_eq!(msg.effective_content(), "Hello from Venice!");
3676    }
3677
3678    // ----------------------------------------------------------
3679    // SSE streaming reasoning_content fallback tests
3680    // ----------------------------------------------------------
3681
3682    #[test]
3683    fn parse_sse_line_with_content() {
3684        let line = r#"data: {"choices":[{"delta":{"content":"hello"}}]}"#;
3685        let result = parse_sse_line(line).unwrap().unwrap();
3686        assert_eq!(result.delta, "hello");
3687        assert!(result.reasoning.is_none());
3688    }
3689
3690    #[test]
3691    fn parse_sse_line_with_reasoning_content() {
3692        let line = r#"data: {"choices":[{"delta":{"reasoning_content":"thinking..."}}]}"#;
3693        let result = parse_sse_line(line).unwrap().unwrap();
3694        assert!(result.delta.is_empty());
3695        assert_eq!(result.reasoning.as_deref(), Some("thinking..."));
3696    }
3697
3698    #[test]
3699    fn parse_sse_line_with_both_prefers_content() {
3700        let line = r#"data: {"choices":[{"delta":{"content":"real answer","reasoning_content":"thinking..."}}]}"#;
3701        let result = parse_sse_line(line).unwrap().unwrap();
3702        assert_eq!(result.delta, "real answer");
3703        assert!(result.reasoning.is_none());
3704    }
3705
3706    #[test]
3707    fn parse_sse_line_with_empty_content_falls_back_to_reasoning() {
3708        let line =
3709            r#"data: {"choices":[{"delta":{"content":"","reasoning_content":"thinking..."}}]}"#;
3710        let result = parse_sse_line(line).unwrap().unwrap();
3711        assert!(result.delta.is_empty());
3712        assert_eq!(result.reasoning.as_deref(), Some("thinking..."));
3713    }
3714
3715    #[test]
3716    fn parse_sse_line_done_sentinel() {
3717        let line = "data: [DONE]";
3718        let result = parse_sse_line(line).unwrap();
3719        assert!(result.is_none());
3720    }
3721
3722    #[test]
3723    fn parse_sse_chunk_with_tool_call_delta() {
3724        let line = r#"data: {"choices":[{"delta":{"tool_calls":[{"index":0,"id":"call_1","function":{"name":"shell","arguments":"{\"command\":\"date\"}"}}]}}]}"#;
3725        let chunk = parse_sse_chunk(line)
3726            .unwrap()
3727            .expect("chunk should be parsed");
3728        let choice = chunk.choices.first().expect("choice should exist");
3729        let tool_calls = choice
3730            .delta
3731            .tool_calls
3732            .as_ref()
3733            .expect("tool call deltas should exist");
3734        assert_eq!(tool_calls.len(), 1);
3735        assert_eq!(tool_calls[0].index, Some(0));
3736        assert_eq!(tool_calls[0].id.as_deref(), Some("call_1"));
3737        assert_eq!(
3738            tool_calls[0]
3739                .function
3740                .as_ref()
3741                .and_then(|function| function.name.as_deref()),
3742            Some("shell")
3743        );
3744    }
3745
3746    #[test]
3747    fn stream_tool_call_accumulator_combines_deltas() {
3748        let mut acc = StreamToolCallAccumulator::default();
3749        acc.apply_delta(&StreamToolCallDelta {
3750            index: Some(0),
3751            id: Some("call_1".to_string()),
3752            function: Some(StreamFunctionDelta {
3753                name: Some("shell".to_string()),
3754                arguments: Some("{\"command\":\"".to_string()),
3755            }),
3756            name: None,
3757            arguments: None,
3758        });
3759        acc.apply_delta(&StreamToolCallDelta {
3760            index: Some(0),
3761            id: None,
3762            function: Some(StreamFunctionDelta {
3763                name: None,
3764                arguments: Some("date\"}".to_string()),
3765            }),
3766            name: None,
3767            arguments: None,
3768        });
3769
3770        let tool_call = acc
3771            .into_provider_tool_call()
3772            .expect("accumulator should emit tool call");
3773        assert_eq!(tool_call.id, "call_1");
3774        assert_eq!(tool_call.name, "shell");
3775        assert_eq!(tool_call.arguments, r#"{"command":"date"}"#);
3776    }
3777
3778    #[test]
3779    fn api_response_parses_usage() {
3780        let json = r#"{
3781            "choices": [{"message": {"content": "Hello"}}],
3782            "usage": {"prompt_tokens": 150, "completion_tokens": 60}
3783        }"#;
3784        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
3785        let usage = resp.usage.unwrap();
3786        assert_eq!(usage.prompt_tokens, Some(150));
3787        assert_eq!(usage.completion_tokens, Some(60));
3788    }
3789
3790    #[test]
3791    fn api_response_parses_without_usage() {
3792        let json = r#"{"choices": [{"message": {"content": "Hello"}}]}"#;
3793        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
3794        assert!(resp.usage.is_none());
3795    }
3796
3797    // ═══════════════════════════════════════════════════════════════════════
3798    // reasoning_content pass-through tests
3799    // ═══════════════════════════════════════════════════════════════════════
3800
3801    #[test]
3802    fn parse_native_response_captures_reasoning_content() {
3803        let message = ResponseMessage {
3804            content: Some("answer".to_string()),
3805            reasoning_content: Some("thinking step".to_string()),
3806            tool_calls: Some(vec![ToolCall {
3807                id: Some("call_1".to_string()),
3808                kind: Some("function".to_string()),
3809                function: Some(Function {
3810                    name: Some("shell".to_string()),
3811                    arguments: Some(r#"{"cmd":"ls"}"#.to_string()),
3812                }),
3813                name: None,
3814                arguments: None,
3815                parameters: None,
3816            }]),
3817        };
3818
3819        let parsed = OpenAiCompatibleProvider::parse_native_response(message);
3820        assert_eq!(parsed.reasoning_content.as_deref(), Some("thinking step"));
3821        assert_eq!(parsed.text.as_deref(), Some("answer"));
3822        assert_eq!(parsed.tool_calls.len(), 1);
3823    }
3824
3825    #[test]
3826    fn parse_native_response_none_reasoning_content_for_normal_model() {
3827        let message = ResponseMessage {
3828            content: Some("hello".to_string()),
3829            reasoning_content: None,
3830            tool_calls: None,
3831        };
3832
3833        let parsed = OpenAiCompatibleProvider::parse_native_response(message);
3834        assert!(parsed.reasoning_content.is_none());
3835        assert_eq!(parsed.text.as_deref(), Some("hello"));
3836    }
3837
3838    #[test]
3839    fn convert_messages_for_native_round_trips_reasoning_content() {
3840        // Simulate stored assistant history JSON that includes reasoning_content
3841        let history_json = serde_json::json!({
3842            "content": "I will check",
3843            "tool_calls": [{
3844                "id": "tc_1",
3845                "name": "shell",
3846                "arguments": "{\"cmd\":\"ls\"}"
3847            }],
3848            "reasoning_content": "Let me think about this..."
3849        });
3850
3851        let messages = vec![ChatMessage::assistant(history_json.to_string())];
3852        let native = OpenAiCompatibleProvider::convert_messages_for_native(&messages, true);
3853        assert_eq!(native.len(), 1);
3854        assert_eq!(native[0].role, "assistant");
3855        assert_eq!(
3856            native[0].reasoning_content.as_deref(),
3857            Some("Let me think about this...")
3858        );
3859        assert!(native[0].tool_calls.is_some());
3860    }
3861
3862    #[test]
3863    fn convert_messages_for_native_no_reasoning_content_when_absent() {
3864        // Normal model history without reasoning_content key
3865        let history_json = serde_json::json!({
3866            "content": "I will check",
3867            "tool_calls": [{
3868                "id": "tc_1",
3869                "name": "shell",
3870                "arguments": "{\"cmd\":\"ls\"}"
3871            }]
3872        });
3873
3874        let messages = vec![ChatMessage::assistant(history_json.to_string())];
3875        let native = OpenAiCompatibleProvider::convert_messages_for_native(&messages, true);
3876        assert_eq!(native.len(), 1);
3877        assert!(native[0].reasoning_content.is_none());
3878    }
3879
3880    #[test]
3881    fn convert_messages_for_native_reasoning_content_serialized_only_when_present() {
3882        // Verify skip_serializing_if works: reasoning_content omitted from JSON when None
3883        let msg_without = NativeMessage {
3884            role: "assistant".to_string(),
3885            content: Some(MessageContent::Text("hi".to_string())),
3886            tool_call_id: None,
3887            tool_calls: None,
3888            reasoning_content: None,
3889        };
3890        let json = serde_json::to_string(&msg_without).unwrap();
3891        assert!(
3892            !json.contains("reasoning_content"),
3893            "reasoning_content should be omitted when None"
3894        );
3895
3896        let msg_with = NativeMessage {
3897            role: "assistant".to_string(),
3898            content: Some(MessageContent::Text("hi".to_string())),
3899            tool_call_id: None,
3900            tool_calls: None,
3901            reasoning_content: Some("thinking...".to_string()),
3902        };
3903        let json = serde_json::to_string(&msg_with).unwrap();
3904        assert!(
3905            json.contains("reasoning_content"),
3906            "reasoning_content should be present when Some"
3907        );
3908        assert!(json.contains("thinking..."));
3909    }
3910
3911    #[test]
3912    fn default_timeout_is_120s() {
3913        let p = make_provider("test", "https://example.com", None);
3914        assert_eq!(p.timeout_secs, 120);
3915    }
3916
3917    #[test]
3918    fn with_timeout_secs_overrides_default() {
3919        let p = make_provider("test", "https://example.com", None).with_timeout_secs(300);
3920        assert_eq!(p.timeout_secs, 300);
3921    }
3922
3923    #[test]
3924    fn extra_headers_default_empty() {
3925        let p = make_provider("test", "https://example.com", None);
3926        assert!(p.extra_headers.is_empty());
3927    }
3928
3929    #[test]
3930    fn with_extra_headers_sets_headers() {
3931        let mut headers = std::collections::HashMap::new();
3932        headers.insert("X-Title".to_string(), "construct".to_string());
3933        headers.insert(
3934            "HTTP-Referer".to_string(),
3935            "https://example.com".to_string(),
3936        );
3937        let p = make_provider("test", "https://example.com", None).with_extra_headers(headers);
3938        assert_eq!(p.extra_headers.len(), 2);
3939        assert_eq!(p.extra_headers.get("X-Title").unwrap(), "construct");
3940        assert_eq!(
3941            p.extra_headers.get("HTTP-Referer").unwrap(),
3942            "https://example.com"
3943        );
3944    }
3945
3946    #[test]
3947    fn http_client_with_extra_headers_builds_successfully() {
3948        let mut headers = std::collections::HashMap::new();
3949        headers.insert("X-Title".to_string(), "construct".to_string());
3950        headers.insert("User-Agent".to_string(), "TestAgent/1.0".to_string());
3951        let p = make_provider("test", "https://example.com", None).with_extra_headers(headers);
3952        // Should not panic
3953        let _client = p.http_client();
3954    }
3955
3956    #[test]
3957    fn http_client_without_extra_headers_or_user_agent() {
3958        let p = make_provider("test", "https://example.com", None);
3959        // Should use the cached proxy client path
3960        let _client = p.http_client();
3961    }
3962
3963    #[test]
3964    fn extra_headers_combined_with_user_agent() {
3965        let mut headers = std::collections::HashMap::new();
3966        headers.insert("X-Title".to_string(), "construct".to_string());
3967        let p = OpenAiCompatibleProvider::new_with_user_agent(
3968            "test",
3969            "https://example.com",
3970            None,
3971            AuthStyle::Bearer,
3972            "CustomAgent/1.0",
3973        )
3974        .with_extra_headers(headers);
3975        assert_eq!(p.user_agent.as_deref(), Some("CustomAgent/1.0"));
3976        assert_eq!(p.extra_headers.len(), 1);
3977        // Should not panic
3978        let _client = p.http_client();
3979    }
3980
3981    #[test]
3982    fn tool_call_none_fields_omitted_from_json() {
3983        // Ensures providers like Mistral that reject extra fields (e.g. "name": null)
3984        // don't receive them when the ToolCall compat fields are None.
3985        let tc = ToolCall {
3986            id: Some("call_1".to_string()),
3987            kind: Some("function".to_string()),
3988            function: Some(Function {
3989                name: Some("shell".to_string()),
3990                arguments: Some("{\"command\":\"ls\"}".to_string()),
3991            }),
3992            name: None,
3993            arguments: None,
3994            parameters: None,
3995        };
3996        let json = serde_json::to_value(&tc).unwrap();
3997        assert!(!json.as_object().unwrap().contains_key("name"));
3998        assert!(!json.as_object().unwrap().contains_key("arguments"));
3999        assert!(!json.as_object().unwrap().contains_key("parameters"));
4000        // Standard fields must be present
4001        assert!(json.as_object().unwrap().contains_key("id"));
4002        assert!(json.as_object().unwrap().contains_key("type"));
4003        assert!(json.as_object().unwrap().contains_key("function"));
4004    }
4005
4006    #[test]
4007    fn tool_call_with_compat_fields_serializes_them() {
4008        // When compat fields are Some, they should appear in the output.
4009        let tc = ToolCall {
4010            id: None,
4011            kind: None,
4012            function: None,
4013            name: Some("shell".to_string()),
4014            arguments: Some("{\"command\":\"ls\"}".to_string()),
4015            parameters: None,
4016        };
4017        let json = serde_json::to_value(&tc).unwrap();
4018        assert_eq!(json["name"], "shell");
4019        assert_eq!(json["arguments"], "{\"command\":\"ls\"}");
4020        // None fields should be omitted
4021        assert!(!json.as_object().unwrap().contains_key("id"));
4022        assert!(!json.as_object().unwrap().contains_key("type"));
4023        assert!(!json.as_object().unwrap().contains_key("function"));
4024        assert!(!json.as_object().unwrap().contains_key("parameters"));
4025    }
4026
4027    // ── parse_proxy_tool_event tests ──
4028
4029    #[test]
4030    fn proxy_tool_start_valid() {
4031        let line = r#"data: {"x_tool_start":{"name":"bash","arguments":"{\"cmd\":\"ls\"}"}}"#;
4032        let event = parse_proxy_tool_event(line);
4033        assert!(matches!(
4034            event,
4035            Some(StreamEvent::PreExecutedToolCall { ref name, ref args })
4036            if name == "bash" && args == r#"{"cmd":"ls"}"#
4037        ));
4038    }
4039
4040    #[test]
4041    fn proxy_tool_start_missing_name_returns_none() {
4042        let line = r#"data: {"x_tool_start":{"arguments":"{}"}}"#;
4043        assert!(parse_proxy_tool_event(line).is_none());
4044    }
4045
4046    #[test]
4047    fn proxy_tool_start_missing_arguments_defaults() {
4048        let line = r#"data: {"x_tool_start":{"name":"read"}}"#;
4049        let event = parse_proxy_tool_event(line);
4050        assert!(matches!(
4051            event,
4052            Some(StreamEvent::PreExecutedToolCall { ref name, ref args })
4053            if name == "read" && args == "{}"
4054        ));
4055    }
4056
4057    #[test]
4058    fn proxy_tool_result_valid() {
4059        let line = r#"data: {"x_tool_result":{"name":"bash","output":"hello world"}}"#;
4060        let event = parse_proxy_tool_event(line);
4061        assert!(matches!(
4062            event,
4063            Some(StreamEvent::PreExecutedToolResult { ref name, ref output })
4064            if name == "bash" && output == "hello world"
4065        ));
4066    }
4067
4068    #[test]
4069    fn proxy_tool_result_missing_fields_uses_defaults() {
4070        let line = r#"data: {"x_tool_result":{}}"#;
4071        let event = parse_proxy_tool_event(line);
4072        assert!(matches!(
4073            event,
4074            Some(StreamEvent::PreExecutedToolResult { ref name, ref output })
4075            if name == "unknown" && output.is_empty()
4076        ));
4077    }
4078
4079    #[test]
4080    fn proxy_tool_event_non_json_returns_none() {
4081        assert!(parse_proxy_tool_event("data: not json").is_none());
4082    }
4083
4084    #[test]
4085    fn proxy_tool_event_no_data_prefix_returns_none() {
4086        let line = r#"{"x_tool_start":{"name":"bash"}}"#;
4087        assert!(parse_proxy_tool_event(line).is_none());
4088    }
4089
4090    #[test]
4091    fn proxy_tool_event_standard_openai_chunk_returns_none() {
4092        let line = r#"data: {"id":"chatcmpl-1","choices":[{"delta":{"content":"hi"}}]}"#;
4093        assert!(parse_proxy_tool_event(line).is_none());
4094    }
4095
4096    #[test]
4097    fn proxy_tool_event_done_sentinel_returns_none() {
4098        assert!(parse_proxy_tool_event("data: [DONE]").is_none());
4099    }
4100}
construct/providers/compatible.rs

construct/providers/
compatible.rs