Skip to main content

phi_core/provider/
openai_compat.rs

1//! OpenAI Chat Completions compatible provider.
2//!
3//! One implementation covers OpenAI, xAI, Groq, Cerebras, OpenRouter,
4//! Mistral, DeepSeek, MiniMax, HuggingFace, Kimi, and any other provider
5//! that implements the OpenAI Chat Completions API.
6//!
7//! Behavioral differences are handled via `OpenAiCompat` flags in ModelConfig.
8/*
9ARCHITECTURE: OpenAiCompatProvider — one implementation, 15+ providers
10
11The OpenAI Chat Completions API is a de facto industry standard. Rather than
12writing separate providers for OpenAI, Groq, DeepSeek, etc., we write ONE provider
13that reads `OpenAiCompat` flags from `ModelConfig` to handle per-provider quirks:
14
15  - `supports_developer_role`           → use "developer" instead of "system" role
16  - `max_tokens_field`                  → use "max_completion_tokens" vs "max_tokens"
17  - `supports_reasoning_effort`         → send `reasoning_effort: "high/medium/low"`
18  - `requires_tool_result_name`         → include `name` field in tool results
19  - `thinking_format`                   → parse reasoning from `reasoning` or `reasoning_content`
20
21Adding a new OpenAI-compatible provider requires ONLY:
22  1. A new `OpenAiCompat::new_provider()` factory in model.rs
23  2. A `ModelConfig::new_provider(id, name)` factory in model.rs
24  No new provider files needed.
25
26ARCHITECTURE: Tool call buffering vs Anthropic's content_block approach
27
28OpenAI and Anthropic stream tool calls differently:
29  Anthropic: explicit start/delta/stop events with `content_index`
30  OpenAI:    tool calls appear as `delta.tool_calls[N]` fragments across chunks;
31             each chunk may have multiple tool call deltas; `index` identifies which
32
33We buffer OpenAI tool calls in `ToolCallBuffer` (scratch pads), then push them
34into `content` as complete `Content::ToolCall` values at stream end.
35This two-phase approach avoids partial JSON being visible to the agent loop.
36
37ARCHITECTURE: `[DONE]` sentinel
38
39OpenAI streams end with a special SSE data payload `[DONE]` (not valid JSON).
40The provider explicitly checks for this and breaks the loop rather than
41trying to parse it as JSON.
42*/
43
44use super::model::{MaxTokensField, ModelConfig, OpenAiCompat, ThinkingFormat};
45use super::traits::*;
46use crate::types::*;
47use async_trait::async_trait;
48use futures::StreamExt;
49use reqwest_eventsource::EventSource;
50use serde::Deserialize;
51use tokio::sync::mpsc;
52use tracing::{debug, warn};
53
54/// Unit struct — no state. All logic in the `StreamProvider` impl.
55pub struct OpenAiCompatProvider;
56
57#[async_trait]
58impl StreamProvider for OpenAiCompatProvider {
59    fn provider_id(&self) -> &str {
60        "openai"
61    }
62
63    async fn stream(
64        &self,
65        config: StreamConfig, // REQUEST — model_config.base_url determines which of 15+ providers to hit
66        tx: mpsc::UnboundedSender<StreamEvent>, // OBSERVER — receives SSE events ([DONE] terminates the stream)
67        cancel: tokio_util::sync::CancellationToken, // ABORT — races against SSE stream
68    ) -> Result<Message, ProviderError> {
69        let model_config = &config.model_config;
70        /*
71        RUST QUIRK: `.as_ref().cloned().unwrap_or_default()`
72        `model_config.compat` is `Option<OpenAiCompat>`.
73          `.as_ref()` → `Option<&OpenAiCompat>` (avoid moving out of model_config)
74          `.cloned()` → `Option<OpenAiCompat>` (clone the inner value if Some)
75          `.unwrap_or_default()` → `OpenAiCompat` (use `Default::default()` if None)
76        Result: an owned `OpenAiCompat` with appropriate flags for this provider,
77        defaulting to conservative "generic OpenAI-compat" if compat wasn't specified.
78        */
79        let compat = model_config.compat.as_ref().cloned().unwrap_or_default();
80        // Resolve via CredentialProvider when set, else use the static `api_key`.
81        let api_key = model_config.resolve_api_key().await?;
82
83        let base_url = &model_config.base_url;
84        // Append the endpoint path — base_url is like "https://api.openai.com/v1" (no trailing slash)
85        let url = format!("{}/chat/completions", base_url);
86
87        let body = build_request_body(&config, model_config, &compat);
88        debug!(
89            "OpenAI compat request: model={} url={}",
90            config.model_config.id, url
91        );
92
93        let client = reqwest::Client::new();
94        let mut request = client
95            .post(&url)
96            .header("content-type", "application/json")
97            .header("authorization", format!("Bearer {}", api_key));
98
99        // Add any extra headers from model config
100        for (k, v) in &model_config.headers {
101            request = request.header(k, v);
102        }
103
104        let request = request.json(&body);
105
106        let mut es =
107            EventSource::new(request).map_err(|e| ProviderError::Network(e.to_string()))?;
108
109        /*
110        ARCHITECTURE: Streaming accumulators
111
112        Unlike Anthropic (which has explicit content_block_start events), OpenAI
113        "discovers" content blocks dynamically as deltas arrive:
114          - First delta with `content: Some(text)` → create a text block
115          - First delta with `reasoning_content` → create a thinking block
116          - First delta with `tool_calls[N]` → create buffer N in tool_call_buffers
117
118        `tool_call_buffers` accumulates partial tool call JSON (id/name/arguments fragments)
119        until the stream ends, then we convert them to `Content::ToolCall` values.
120        */
121        let mut content: Vec<Content> = Vec::new();
122        let mut usage = Usage::default();
123        let mut stop_reason = StopReason::Stop;
124        let mut tool_call_buffers: Vec<ToolCallBuffer> = Vec::new(); // scratch pads for partial tool calls
125
126        let _ = tx.send(StreamEvent::Start);
127
128        loop {
129            tokio::select! {
130                _ = cancel.cancelled() => {
131                    es.close();
132                    return Err(ProviderError::Cancelled);
133                }
134                event = es.next() => {
135                    match event {
136                        None => break,
137                        Some(Ok(reqwest_eventsource::Event::Open)) => {}
138                        Some(Ok(reqwest_eventsource::Event::Message(msg))) => {
139                            // OpenAI signals stream end with "[DONE]" (not valid JSON — check first)
140                            if msg.data == "[DONE]" {
141                                break;
142                            }
143
144                            /*
145                            RUST QUIRK: `match serde_json::from_str(...) { Ok(c) => c, Err => continue }`
146
147                            This is a `match` used as an expression that short-circuits on error.
148                            `continue` jumps back to the top of the `loop` (skipping this chunk).
149                            Some providers send non-JSON lines (e.g. comments) in their SSE stream;
150                            we log and ignore them rather than failing the whole stream.
151
152                            This is more flexible than `?` (which would return from the whole function).
153                            */
154                            let chunk: OpenAiChunk = match serde_json::from_str(&msg.data) {
155                                Ok(c) => c,
156                                Err(e) => {
157                                    debug!("Failed to parse OpenAI chunk: {} data={}", e, &msg.data);
158                                    continue; // skip this event, keep processing the stream
159                                }
160                            };
161
162                            // Usage appears in some chunks (varies by provider — not always the last)
163                            if let Some(u) = &chunk.usage {
164                                usage.input = u.prompt_tokens;
165                                usage.output = u.completion_tokens;
166                                usage.total_tokens = u.total_tokens;
167                                if let Some(details) = &u.prompt_tokens_details {
168                                    usage.cache_read = details.cached_tokens; // prompt cache hits
169                                }
170                                if let Some(details) = &u.completion_tokens_details {
171                                    usage.reasoning = details.reasoning_tokens; // o-series reasoning
172                                }
173                            }
174
175                            for choice in &chunk.choices {
176                                let delta = &choice.delta;
177
178                                /*
179                                ARCHITECTURE: Thinking format dispatch
180
181                                Different providers use different field names for chain-of-thought:
182                                  xAI (Grok):     `delta.reasoning`
183                                  OpenRouter:     `delta.reasoning_details` (array of {type, text})
184                                  OpenAI/others:  `delta.reasoning_content`
185
186                                `ThinkingFormat::Xai`        → read from `delta.reasoning`
187                                `ThinkingFormat::OpenRouter` → collect `delta.reasoning_details`
188                                                               entries where type == "thinking"
189                                all others                   → read from `delta.reasoning_content`
190
191                                RUST QUIRK: `.as_deref()` on `Option<String>` → `Option<&str>`
192                                  `delta.reasoning` is `Option<String>`.
193                                  `.as_deref()` borrows the inner String as `&str`.
194                                  Result: `Option<&str>` — `None` if the field was absent,
195                                  `Some("thinking text")` if it had content.
196                                */
197                                // Owned string is needed for OpenRouter (assembled from array);
198                                // other formats borrow directly from delta fields.
199                                // `reasoning_owned` anchors the String so `reasoning` (&str) can borrow it.
200                                let reasoning_owned = match compat.thinking_format {
201                                    ThinkingFormat::OpenRouter => {
202                                        delta.reasoning_details.as_ref().map(|details| {
203                                            details
204                                                .iter()
205                                                .filter(|d| d.detail_type == "thinking")
206                                                .filter_map(|d| d.text.as_deref())
207                                                .collect::<String>()
208                                        })
209                                    }
210                                    _ => None,
211                                };
212                                let reasoning = match compat.thinking_format {
213                                    ThinkingFormat::Xai => delta.reasoning.as_deref(),
214                                    ThinkingFormat::OpenRouter => reasoning_owned.as_deref(),
215                                    _ => delta.reasoning_content.as_deref(),
216                                };
217                                if let Some(reasoning_text) = reasoning {
218                                    // Find existing thinking block or create a new one
219                                    let thinking_idx = content.iter().position(|c| matches!(c, Content::Thinking { .. }));
220                                    let idx = match thinking_idx {
221                                        Some(i) => i,
222                                        None => {
223                                            content.push(Content::Thinking { thinking: String::new(), signature: None });
224                                            content.len() - 1
225                                        }
226                                    };
227                                    if let Some(Content::Thinking { thinking, .. }) = content.get_mut(idx) {
228                                        thinking.push_str(reasoning_text);
229                                    }
230                                    let _ = tx.send(StreamEvent::ThinkingDelta {
231                                        content_index: idx,
232                                        delta: reasoning_text.to_string(),
233                                    });
234                                }
235
236                                // Text content — find or create a text block
237                                if let Some(text) = &delta.content {
238                                    let text_idx = content.iter().position(|c| matches!(c, Content::Text { .. }));
239                                    let idx = match text_idx {
240                                        Some(i) => i,
241                                        None => {
242                                            content.push(Content::Text { text: String::new() });
243                                            content.len() - 1
244                                        }
245                                    };
246                                    if let Some(Content::Text { text: t }) = content.get_mut(idx) {
247                                        t.push_str(text);
248                                    }
249                                    let _ = tx.send(StreamEvent::TextDelta {
250                                        content_index: idx,
251                                        delta: text.clone(),
252                                    });
253                                }
254
255                                /*
256                                ARCHITECTURE: Tool call buffering
257
258                                OpenAI streams tool calls as partial JSON fragments across
259                                multiple chunks. Each `tc` (tool call delta) has:
260                                  `tc.index` — which parallel tool call this belongs to
261                                  `tc.id`    — call ID (only in the first delta for this index)
262                                  `tc.function.name` — only in first delta
263                                  `tc.function.arguments` — partial JSON, streamed across many chunks
264
265                                We maintain `tool_call_buffers[tc_index]` as scratch pads.
266                                `while tool_call_buffers.len() <= tc_index { push empty buffer }`
267                                — ensures the buffer exists before indexing.
268
269                                RUST QUIRK: `buf.name.clone_from(name)` vs `buf.name = name.clone()`
270                                `.clone_from(&src)` reuses the existing String allocation if possible
271                                (it calls `String::replace_range` internally). Slightly more efficient
272                                than `.clone()` when the target already has capacity.
273                                */
274                                if let Some(tool_calls) = &delta.tool_calls {
275                                    for tc in tool_calls {
276                                        let tc_index = tc.index as usize;
277                                        while tool_call_buffers.len() <= tc_index {
278                                            tool_call_buffers.push(ToolCallBuffer::default());
279                                        }
280                                        let buf = &mut tool_call_buffers[tc_index];
281                                        if let Some(id) = &tc.id {
282                                            buf.id = id.clone();
283                                        }
284                                        if let Some(f) = &tc.function {
285                                            if let Some(name) = &f.name {
286                                                buf.name.clone_from(name);
287                                                let _ = tx.send(StreamEvent::ToolCallStart {
288                                                    content_index: content.len() + tc_index,
289                                                    id: buf.id.clone(),
290                                                    name: name.clone(),
291                                                });
292                                            }
293                                            if let Some(args) = &f.arguments {
294                                                buf.arguments.push_str(args);
295                                                let _ = tx.send(StreamEvent::ToolCallDelta {
296                                                    content_index: content.len() + tc_index,
297                                                    delta: args.clone(),
298                                                });
299                                            }
300                                        }
301                                    }
302                                }
303
304                                // `finish_reason` signals why the response stopped generating
305                                if let Some(reason) = &choice.finish_reason {
306                                    stop_reason = match reason.as_str() {
307                                        "stop" => StopReason::Stop,
308                                        "length" => StopReason::Length,
309                                        "tool_calls" => StopReason::ToolUse,
310                                        _ => StopReason::Stop,
311                                    };
312                                }
313                            }
314                        }
315                        Some(Err(e)) => {
316                            let err_str = e.to_string();
317                            warn!("OpenAI SSE error: {}", err_str);
318
319                            // Classify retryable HTTP errors so the retry loop can handle them.
320                            // reqwest-eventsource renders InvalidStatusCode as
321                            // "Invalid status code: {code} {reason}" (e.g. "Invalid status code: 429 Too Many Requests").
322                            // We match on "status code: NNN" for precision, with broader fallbacks
323                            // for error messages from other sources (e.g. provider JSON error bodies).
324                            let err_lower = err_str.to_lowercase();
325                            if err_lower.contains("status code: 429")
326                                || err_lower.contains("rate limit")
327                                || err_lower.contains("rate-limit")
328                            {
329                                return Err(ProviderError::RateLimited { retry_after_ms: None });
330                            }
331                            if err_lower.contains("status code: 502")
332                                || err_lower.contains("status code: 503")
333                                || err_lower.contains("status code: 504")
334                            {
335                                return Err(ProviderError::Network(err_str));
336                            }
337
338                            let err_msg = Message::Assistant {
339                                content: vec![Content::Text { text: String::new() }],
340                                stop_reason: StopReason::Error,
341                                model: config.model_config.id.clone(),
342                                provider: model_config.provider.clone(),
343                                usage: usage.clone(),
344                                timestamp: now_ms(),
345                                error_message: Some(err_str),
346                            };
347                            let _ = tx.send(StreamEvent::Error { message: err_msg.clone() });
348                            return Ok(err_msg);
349                        }
350                    }
351                }
352            }
353        }
354
355        /*
356        ARCHITECTURE: Finalizing tool calls after stream end
357
358        Only after the stream ends do we have complete JSON for each tool call.
359        We parse `buf.arguments` (the accumulated raw JSON string) and push a
360        `Content::ToolCall` for each buffer.
361
362        RUST QUIRK: `.unwrap_or(serde_json::Value::Object(Default::default()))`
363        If `serde_json::from_str` fails (malformed JSON), we fall back to an
364        empty JSON object `{}`. This is defensive: the agent loop should receive
365        a ToolCall even if it has empty arguments, so it can report the parsing
366        failure as a tool execution error rather than crashing the whole stream.
367        `Default::default()` for `serde_json::Map<...>` is an empty map — so
368        `serde_json::Value::Object(Default::default())` builds `{}`.
369        */
370        for buf in &tool_call_buffers {
371            let args = serde_json::from_str(&buf.arguments)
372                .unwrap_or(serde_json::Value::Object(Default::default()));
373            content.push(Content::ToolCall {
374                id: buf.id.clone(),
375                name: buf.name.clone(),
376                arguments: args,
377            });
378            let _ = tx.send(StreamEvent::ToolCallEnd {
379                content_index: content.len() - 1,
380            });
381        }
382
383        if !tool_call_buffers.is_empty() {
384            stop_reason = StopReason::ToolUse;
385        }
386
387        let message = Message::Assistant {
388            content,
389            stop_reason,
390            model: config.model_config.id.clone(),
391            provider: model_config.provider.clone(),
392            usage,
393            timestamp: now_ms(),
394            error_message: None,
395        };
396
397        let _ = tx.send(StreamEvent::Done {
398            message: message.clone(),
399        });
400        Ok(message)
401    }
402}
403
404/// Scratch pad for accumulating a single streaming tool call across many SSE chunks.
405/*
406RUST QUIRK: `#[derive(Default)]` on a struct with String fields
407  `String::default()` is an empty string `""`.
408  So `ToolCallBuffer::default()` gives `{ id: "", name: "", arguments: "" }`.
409  This lets us use `tool_call_buffers.push(ToolCallBuffer::default())` to
410  create new scratch pads without writing out all the fields manually.
411*/
412#[derive(Default)]
413struct ToolCallBuffer {
414    id: String,        // tool call ID (arrives once, in the first chunk for this index)
415    name: String,      // function name (arrives once)
416    arguments: String, // JSON arguments (accumulated across many chunks)
417}
418
419/// Builds the JSON request body for the OpenAI Chat Completions API.
420/*
421ARCHITECTURE: build_request_body — translation layer (yo-core types → OpenAI JSON)
422
423Converts our internal `StreamConfig` into the OpenAI-compatible JSON body.
424The `compat` flags control which variant of the API to use:
425  - System/developer role
426  - `max_tokens` vs `max_completion_tokens`
427  - `reasoning_effort` parameter for thinking-capable models
428  - Tool result `name` field (required by some providers)
429  - Image format (inline base64 vs URL reference)
430
431RUST QUIRK: `let role = if ... { "developer" } else { "system" }` — if as an expression
432  Unlike Python/Java where `if` is a statement, Rust's `if` is an EXPRESSION — it
433  evaluates to a value. Both branches must have the same type (here: `&str`).
434  Python analogy: `role = "developer" if compat.supports_developer_role else "system"`
435*/
436fn build_request_body(
437    config: &StreamConfig, // REQUEST — messages, tools, model, system prompt, cache config
438    model_config: &ModelConfig, // ROUTING — carries base_url (which provider) and api_key
439    compat: &OpenAiCompat, // QUIRK FLAGS — per-provider behavior switches (store, dev role, reasoning format, etc.)
440) -> serde_json::Value {
441    let mut messages: Vec<serde_json::Value> = Vec::new();
442
443    // System prompt — role depends on whether this provider uses "developer" vs "system"
444    if !config.system_prompt.is_empty() {
445        let role = if compat.supports_developer_role {
446            "developer" // OpenAI o-series models use "developer" for system-level instructions
447        } else {
448            "system" // Standard role for most other providers
449        };
450        messages.push(serde_json::json!({
451            "role": role,
452            "content": config.system_prompt,
453        }));
454    }
455
456    for msg in &config.messages {
457        match msg {
458            Message::User { content, .. } => {
459                messages.push(serde_json::json!({
460                    "role": "user",
461                    "content": content_to_openai(content),
462                }));
463            }
464            Message::Assistant { content, .. } => {
465                let mut parts: Vec<serde_json::Value> = Vec::new();
466                let mut tool_calls: Vec<serde_json::Value> = Vec::new();
467
468                for c in content {
469                    match c {
470                        Content::Text { text } => {
471                            parts.push(serde_json::json!({"type": "text", "text": text}));
472                        }
473                        Content::ToolCall {
474                            id,
475                            name,
476                            arguments,
477                        } => {
478                            tool_calls.push(serde_json::json!({
479                                "id": id,
480                                "type": "function",
481                                "function": {"name": name, "arguments": arguments.to_string()},
482                            }));
483                        }
484                        _ => {}
485                    }
486                }
487
488                let mut msg_obj = serde_json::json!({"role": "assistant"});
489                if !parts.is_empty() {
490                    msg_obj["content"] = serde_json::json!(parts);
491                }
492                if !tool_calls.is_empty() {
493                    msg_obj["tool_calls"] = serde_json::json!(tool_calls);
494                }
495                messages.push(msg_obj);
496            }
497            Message::ToolResult {
498                tool_call_id,
499                tool_name,
500                content,
501                ..
502            } => {
503                let content_val = if content.iter().any(|c| matches!(c, Content::Image { .. })) {
504                    // Images present: use array format for multimodal tool results
505                    content_to_openai(content)
506                } else {
507                    // Text-only: use plain string for maximum compat
508                    let text = content
509                        .iter()
510                        .find_map(|c| match c {
511                            Content::Text { text } => Some(text.clone()),
512                            _ => None,
513                        })
514                        .unwrap_or_default();
515                    serde_json::json!(text)
516                };
517
518                let mut msg_obj = serde_json::json!({
519                    "role": "tool",
520                    "tool_call_id": tool_call_id,
521                    "content": content_val,
522                });
523                if compat.requires_tool_result_name {
524                    msg_obj["name"] = serde_json::json!(tool_name);
525                }
526                messages.push(msg_obj);
527            }
528        }
529    }
530
531    let max_tokens_val = config.max_tokens.unwrap_or(model_config.max_tokens);
532    let mut body = serde_json::json!({
533        "model": config.model_config.id,
534        "stream": true,
535        "stream_options": {"include_usage": true},
536        "messages": messages,
537    });
538
539    match compat.max_tokens_field {
540        MaxTokensField::MaxCompletionTokens => {
541            body["max_completion_tokens"] = serde_json::json!(max_tokens_val);
542        }
543        MaxTokensField::MaxTokens => {
544            body["max_tokens"] = serde_json::json!(max_tokens_val);
545        }
546    }
547
548    if !config.tools.is_empty() {
549        let tools: Vec<serde_json::Value> = config
550            .tools
551            .iter()
552            .map(|t| {
553                serde_json::json!({
554                    "type": "function",
555                    "function": {
556                        "name": t.name,
557                        "description": t.description,
558                        "parameters": t.parameters,
559                    }
560                })
561            })
562            .collect();
563        body["tools"] = serde_json::json!(tools);
564    }
565
566    if config.thinking_level != ThinkingLevel::Off && compat.supports_reasoning_effort {
567        let effort = match config.thinking_level {
568            ThinkingLevel::Minimal | ThinkingLevel::Low => "low",
569            ThinkingLevel::Medium => "medium",
570            ThinkingLevel::High => "high",
571            ThinkingLevel::Off => unreachable!(),
572        };
573        body["reasoning_effort"] = serde_json::json!(effort);
574    }
575
576    if let Some(temp) = config.temperature {
577        body["temperature"] = serde_json::json!(temp);
578    }
579
580    // Structured-output wiring. OpenAI Chat Completions accepts a top-level
581    // `response_format` field with two shapes:
582    //   { "type": "json_object" }                  — free-form JSON
583    //   { "type": "json_schema", "json_schema": {...} } — strict schema
584    // Most non-OpenAI compat providers either honour the same key or ignore it
585    // gracefully — see the capability matrix in docs.
586    match &config.response_format {
587        ResponseFormat::Text => {} // default; omit the field
588        ResponseFormat::JsonObject => {
589            body["response_format"] = serde_json::json!({"type": "json_object"});
590        }
591        ResponseFormat::JsonSchema {
592            schema,
593            name,
594            strict,
595        } => {
596            body["response_format"] = serde_json::json!({
597                "type": "json_schema",
598                "json_schema": {
599                    "name": name,
600                    "schema": schema,
601                    "strict": *strict,
602                },
603            });
604        }
605    }
606
607    body
608}
609
610/// Convert our `Content` blocks to OpenAI's message content format.
611/*
612ARCHITECTURE: content_to_openai — two output shapes for maximum compat
613
614OpenAI supports two shapes for message content:
615  1. A plain string:  "Hello world"       — for text-only, single-block messages
616  2. A parts array:  [{"type":"text",...}] — for multi-block or image-containing messages
617
618Using a plain string where possible maximizes compatibility with older providers
619and reduces JSON payload size. The array format is required for images.
620
621RUST QUIRK: early return via `return`
622  `return serde_json::json!(text)` — exits the function early with a plain string.
623  After the `if` block, we fall through to build the array format.
624  Python analogy: `return text` for the early-exit case.
625*/
626fn content_to_openai(
627    content: &[Content], // SOURCE — slice of Content variants to convert; single Text → plain string, multiple → array
628) -> serde_json::Value {
629    // either a plain JSON string (1 text block) or an array of {type,text/image_url} objects
630    // Optimization: single text block → plain string (maximum provider compatibility)
631    if content.len() == 1 {
632        if let Content::Text { text } = &content[0] {
633            return serde_json::json!(text);
634        }
635    }
636    let parts: Vec<serde_json::Value> = content
637        .iter()
638        .filter_map(|c| match c {
639            Content::Text { text } => Some(serde_json::json!({"type": "text", "text": text})),
640            Content::Image { data, mime_type } => Some(serde_json::json!({
641                "type": "image_url",
642                "image_url": {"url": format!("data:{};base64,{}", mime_type, data)},
643            })),
644            _ => None,
645        })
646        .collect();
647    serde_json::json!(parts)
648}
649
650// ---------------------------------------------------------------------------
651// OpenAI streaming response deserialization types (private to this module)
652// ---------------------------------------------------------------------------
653/*
654ARCHITECTURE: Private deserialization types — mirroring OpenAI's streaming JSON
655
656These structs mirror the shape of OpenAI's streaming response chunks.
657A streaming chunk looks like:
658  {
659    "choices": [{
660      "delta": {
661        "content": "Hello ",
662        "tool_calls": [{"index": 0, "id": "call_abc", "function": {"name": "bash", "arguments": "{"}}]
663      },
664      "finish_reason": null
665    }],
666    "usage": null   // only populated in the last chunk (if stream_options.include_usage = true)
667  }
668
669Multiple optional fields are marked `#[serde(default)]` so absent fields
670deserialize to `None` (for `Option<T>`) or `0` (for `u64`) without error.
671
672RUST QUIRK: `#[derive(Deserialize, Default)]` on `OpenAiDelta`
673  `Default` is needed because `OpenAiChoice.delta` is not `Option<OpenAiDelta>` —
674  it's always present in the JSON but may have all-None fields. The `#[derive(Default)]`
675  provides the "all fields are None" value for `unwrap_or_default()` call sites.
676*/
677
678#[derive(Deserialize)]
679struct OpenAiChunk {
680    #[serde(default)]
681    choices: Vec<OpenAiChoice>,
682    #[serde(default)]
683    usage: Option<OpenAiUsage>,
684}
685
686#[derive(Deserialize)]
687struct OpenAiChoice {
688    delta: OpenAiDelta,
689    #[serde(default)]
690    finish_reason: Option<String>,
691}
692
693/// A single entry in OpenRouter's `reasoning_details` array.
694#[derive(Deserialize)]
695struct OpenRouterReasoningDetail {
696    #[serde(rename = "type")]
697    detail_type: String,
698    #[serde(default)]
699    text: Option<String>,
700}
701
702#[derive(Deserialize, Default)]
703struct OpenAiDelta {
704    #[serde(default)]
705    content: Option<String>,
706    #[serde(default)]
707    reasoning_content: Option<String>,
708    #[serde(default)]
709    reasoning: Option<String>,
710    /// OpenRouter extended thinking: array of `{type, text}` objects.
711    #[serde(default)]
712    reasoning_details: Option<Vec<OpenRouterReasoningDetail>>,
713    #[serde(default)]
714    tool_calls: Option<Vec<OpenAiToolCallDelta>>,
715}
716
717#[derive(Deserialize)]
718struct OpenAiToolCallDelta {
719    #[serde(default)]
720    index: u32,
721    #[serde(default)]
722    id: Option<String>,
723    #[serde(default)]
724    function: Option<OpenAiFunctionDelta>,
725}
726
727#[derive(Deserialize)]
728struct OpenAiFunctionDelta {
729    #[serde(default)]
730    name: Option<String>,
731    #[serde(default)]
732    arguments: Option<String>,
733}
734
735#[derive(Deserialize)]
736struct OpenAiUsage {
737    #[serde(default)]
738    prompt_tokens: u64,
739    #[serde(default)]
740    completion_tokens: u64,
741    #[serde(default)]
742    total_tokens: u64,
743    #[serde(default)]
744    prompt_tokens_details: Option<OpenAiPromptTokensDetails>,
745    #[serde(default)]
746    completion_tokens_details: Option<OpenAiCompletionTokensDetails>,
747}
748
749#[derive(Deserialize)]
750struct OpenAiPromptTokensDetails {
751    #[serde(default)]
752    cached_tokens: u64,
753}
754
755#[derive(Deserialize)]
756struct OpenAiCompletionTokensDetails {
757    #[serde(default)]
758    reasoning_tokens: u64,
759}
760
761#[cfg(test)]
762mod tests {
763    use super::*;
764    use crate::provider::model::ModelConfig;
765
766    #[test]
767    fn test_build_request_body_basic() {
768        let model_config = ModelConfig::openai("gpt-4o", "GPT-4o", "test");
769        let config = StreamConfig {
770            model_config: model_config.clone(),
771            system_prompt: "You are helpful.".into(),
772            messages: vec![Message::user("Hello")],
773            tools: vec![],
774            thinking_level: ThinkingLevel::Off,
775            max_tokens: None,
776            temperature: None,
777            cache_config: CacheConfig::default(),
778            response_format: ResponseFormat::Text,
779        };
780
781        let body = build_request_body(&config, &model_config, &OpenAiCompat::openai());
782        assert_eq!(body["model"], "gpt-4o");
783        assert!(body["stream"].as_bool().unwrap());
784        // Developer role for OpenAI
785        assert_eq!(body["messages"][0]["role"], "developer");
786        assert_eq!(body["messages"][1]["role"], "user");
787        // max_completion_tokens for OpenAI
788        assert!(body["max_completion_tokens"].is_number());
789    }
790
791    #[test]
792    fn test_build_request_body_with_tools() {
793        let model_config = ModelConfig::openai("gpt-4o", "GPT-4o", "test");
794        let compat = OpenAiCompat::openai();
795        let config = StreamConfig {
796            model_config: model_config.clone(),
797            system_prompt: String::new(),
798            messages: vec![Message::user("List files")],
799            tools: vec![ToolDefinition {
800                name: "bash".into(),
801                description: "Run a command".into(),
802                parameters: serde_json::json!({"type": "object"}),
803            }],
804            thinking_level: ThinkingLevel::Off,
805            max_tokens: Some(1024),
806            temperature: Some(0.5),
807            cache_config: CacheConfig::default(),
808            response_format: ResponseFormat::Text,
809        };
810
811        let body = build_request_body(&config, &model_config, &compat);
812        assert!(body["tools"].is_array());
813        assert_eq!(body["tools"][0]["function"]["name"], "bash");
814        assert_eq!(body["temperature"], 0.5);
815    }
816
817    #[test]
818    fn test_content_to_openai_simple_text() {
819        let content = vec![Content::Text {
820            text: "hello".into(),
821        }];
822        let result = content_to_openai(&content);
823        assert_eq!(result, "hello");
824    }
825
826    #[test]
827    fn test_content_to_openai_multipart() {
828        let content = vec![
829            Content::Text {
830                text: "look at this".into(),
831            },
832            Content::Image {
833                data: "abc".into(),
834                mime_type: "image/png".into(),
835            },
836        ];
837        let result = content_to_openai(&content);
838        assert!(result.is_array());
839        assert_eq!(result[0]["type"], "text");
840        assert_eq!(result[1]["type"], "image_url");
841    }
842
843    #[test]
844    fn test_tool_result_with_image() {
845        let model_config = ModelConfig::openai("gpt-4o", "GPT-4o", "test");
846        let compat = OpenAiCompat::openai();
847        let config = StreamConfig {
848            model_config: model_config.clone(),
849            system_prompt: String::new(),
850            messages: vec![
851                Message::Assistant {
852                    content: vec![Content::ToolCall {
853                        id: "call-1".into(),
854                        name: "read_file".into(),
855                        arguments: serde_json::json!({"path": "img.png"}),
856                    }],
857                    stop_reason: StopReason::ToolUse,
858                    model: "test".into(),
859                    provider: "test".into(),
860                    usage: Usage::default(),
861                    timestamp: 0,
862                    error_message: None,
863                },
864                Message::ToolResult {
865                    tool_call_id: "call-1".into(),
866                    tool_name: "read_file".into(),
867                    content: vec![Content::Image {
868                        data: "aW1hZ2VkYXRh".into(),
869                        mime_type: "image/png".into(),
870                    }],
871                    is_error: false,
872                    timestamp: 0,
873                },
874            ],
875            tools: vec![],
876            thinking_level: ThinkingLevel::Off,
877            max_tokens: None,
878            temperature: None,
879            cache_config: CacheConfig::default(),
880            response_format: ResponseFormat::Text,
881        };
882
883        let body = build_request_body(&config, &model_config, &compat);
884        let msgs = body["messages"].as_array().unwrap();
885        // tool result is the last message (after system + assistant)
886        let tool_msg = msgs.last().unwrap();
887        assert_eq!(tool_msg["role"], "tool");
888        // content should be an array with image_url
889        let content = tool_msg["content"].as_array().unwrap();
890        assert_eq!(content[0]["type"], "image_url");
891        assert!(content[0]["image_url"]["url"]
892            .as_str()
893            .unwrap()
894            .starts_with("data:image/png;base64,"));
895    }
896
897    #[test]
898    fn test_tool_result_text_only_uses_string() {
899        let model_config = ModelConfig::openai("gpt-4o", "GPT-4o", "test");
900        let compat = OpenAiCompat::openai();
901        let config = StreamConfig {
902            model_config: model_config.clone(),
903            system_prompt: String::new(),
904            messages: vec![Message::ToolResult {
905                tool_call_id: "call-1".into(),
906                tool_name: "bash".into(),
907                content: vec![Content::Text {
908                    text: "hello".into(),
909                }],
910                is_error: false,
911                timestamp: 0,
912            }],
913            tools: vec![],
914            thinking_level: ThinkingLevel::Off,
915            max_tokens: None,
916            temperature: None,
917            cache_config: CacheConfig::default(),
918            response_format: ResponseFormat::Text,
919        };
920
921        let body = build_request_body(&config, &model_config, &compat);
922        let msgs = body["messages"].as_array().unwrap();
923        let tool_msg = msgs.last().unwrap();
924        // Text-only: content should be a plain string
925        assert_eq!(tool_msg["content"], "hello");
926    }
927}