phi_core/provider/openai_compat.rs
1//! OpenAI Chat Completions compatible provider.
2//!
3//! One implementation covers OpenAI, xAI, Groq, Cerebras, OpenRouter,
4//! Mistral, DeepSeek, MiniMax, HuggingFace, Kimi, and any other provider
5//! that implements the OpenAI Chat Completions API.
6//!
7//! Behavioral differences are handled via `OpenAiCompat` flags in ModelConfig.
8/*
9ARCHITECTURE: OpenAiCompatProvider — one implementation, 15+ providers
10
11The OpenAI Chat Completions API is a de facto industry standard. Rather than
12writing separate providers for OpenAI, Groq, DeepSeek, etc., we write ONE provider
13that reads `OpenAiCompat` flags from `ModelConfig` to handle per-provider quirks:
14
15 - `supports_developer_role` → use "developer" instead of "system" role
16 - `max_tokens_field` → use "max_completion_tokens" vs "max_tokens"
17 - `supports_reasoning_effort` → send `reasoning_effort: "high/medium/low"`
18 - `requires_tool_result_name` → include `name` field in tool results
19 - `thinking_format` → parse reasoning from `reasoning` or `reasoning_content`
20
21Adding a new OpenAI-compatible provider requires ONLY:
22 1. A new `OpenAiCompat::new_provider()` factory in model.rs
23 2. A `ModelConfig::new_provider(id, name)` factory in model.rs
24 No new provider files needed.
25
26ARCHITECTURE: Tool call buffering vs Anthropic's content_block approach
27
28OpenAI and Anthropic stream tool calls differently:
29 Anthropic: explicit start/delta/stop events with `content_index`
30 OpenAI: tool calls appear as `delta.tool_calls[N]` fragments across chunks;
31 each chunk may have multiple tool call deltas; `index` identifies which
32
33We buffer OpenAI tool calls in `ToolCallBuffer` (scratch pads), then push them
34into `content` as complete `Content::ToolCall` values at stream end.
35This two-phase approach avoids partial JSON being visible to the agent loop.
36
37ARCHITECTURE: `[DONE]` sentinel
38
39OpenAI streams end with a special SSE data payload `[DONE]` (not valid JSON).
40The provider explicitly checks for this and breaks the loop rather than
41trying to parse it as JSON.
42*/
43
44use super::model::{MaxTokensField, ModelConfig, OpenAiCompat, ThinkingFormat};
45use super::traits::*;
46use crate::types::*;
47use async_trait::async_trait;
48use futures::StreamExt;
49use reqwest_eventsource::EventSource;
50use serde::Deserialize;
51use tokio::sync::mpsc;
52use tracing::{debug, warn};
53
54/// Unit struct — no state. All logic in the `StreamProvider` impl.
55pub struct OpenAiCompatProvider;
56
57#[async_trait]
58impl StreamProvider for OpenAiCompatProvider {
59 fn provider_id(&self) -> &str {
60 "openai"
61 }
62
63 async fn stream(
64 &self,
65 config: StreamConfig, // REQUEST — model_config.base_url determines which of 15+ providers to hit
66 tx: mpsc::UnboundedSender<StreamEvent>, // OBSERVER — receives SSE events ([DONE] terminates the stream)
67 cancel: tokio_util::sync::CancellationToken, // ABORT — races against SSE stream
68 ) -> Result<Message, ProviderError> {
69 let model_config = &config.model_config;
70 /*
71 RUST QUIRK: `.as_ref().cloned().unwrap_or_default()`
72 `model_config.compat` is `Option<OpenAiCompat>`.
73 `.as_ref()` → `Option<&OpenAiCompat>` (avoid moving out of model_config)
74 `.cloned()` → `Option<OpenAiCompat>` (clone the inner value if Some)
75 `.unwrap_or_default()` → `OpenAiCompat` (use `Default::default()` if None)
76 Result: an owned `OpenAiCompat` with appropriate flags for this provider,
77 defaulting to conservative "generic OpenAI-compat" if compat wasn't specified.
78 */
79 let compat = model_config.compat.as_ref().cloned().unwrap_or_default();
80 // Resolve via CredentialProvider when set, else use the static `api_key`.
81 let api_key = model_config.resolve_api_key().await?;
82
83 let base_url = &model_config.base_url;
84 // Append the endpoint path — base_url is like "https://api.openai.com/v1" (no trailing slash)
85 let url = format!("{}/chat/completions", base_url);
86
87 let body = build_request_body(&config, model_config, &compat);
88 debug!(
89 "OpenAI compat request: model={} url={}",
90 config.model_config.id, url
91 );
92
93 let client = reqwest::Client::new();
94 let mut request = client
95 .post(&url)
96 .header("content-type", "application/json")
97 .header("authorization", format!("Bearer {}", api_key));
98
99 // Add any extra headers from model config
100 for (k, v) in &model_config.headers {
101 request = request.header(k, v);
102 }
103
104 let request = request.json(&body);
105
106 let mut es =
107 EventSource::new(request).map_err(|e| ProviderError::Network(e.to_string()))?;
108
109 /*
110 ARCHITECTURE: Streaming accumulators
111
112 Unlike Anthropic (which has explicit content_block_start events), OpenAI
113 "discovers" content blocks dynamically as deltas arrive:
114 - First delta with `content: Some(text)` → create a text block
115 - First delta with `reasoning_content` → create a thinking block
116 - First delta with `tool_calls[N]` → create buffer N in tool_call_buffers
117
118 `tool_call_buffers` accumulates partial tool call JSON (id/name/arguments fragments)
119 until the stream ends, then we convert them to `Content::ToolCall` values.
120 */
121 let mut content: Vec<Content> = Vec::new();
122 let mut usage = Usage::default();
123 let mut stop_reason = StopReason::Stop;
124 let mut tool_call_buffers: Vec<ToolCallBuffer> = Vec::new(); // scratch pads for partial tool calls
125
126 let _ = tx.send(StreamEvent::Start);
127
128 loop {
129 tokio::select! {
130 _ = cancel.cancelled() => {
131 es.close();
132 return Err(ProviderError::Cancelled);
133 }
134 event = es.next() => {
135 match event {
136 None => break,
137 Some(Ok(reqwest_eventsource::Event::Open)) => {}
138 Some(Ok(reqwest_eventsource::Event::Message(msg))) => {
139 // OpenAI signals stream end with "[DONE]" (not valid JSON — check first)
140 if msg.data == "[DONE]" {
141 break;
142 }
143
144 /*
145 RUST QUIRK: `match serde_json::from_str(...) { Ok(c) => c, Err => continue }`
146
147 This is a `match` used as an expression that short-circuits on error.
148 `continue` jumps back to the top of the `loop` (skipping this chunk).
149 Some providers send non-JSON lines (e.g. comments) in their SSE stream;
150 we log and ignore them rather than failing the whole stream.
151
152 This is more flexible than `?` (which would return from the whole function).
153 */
154 let chunk: OpenAiChunk = match serde_json::from_str(&msg.data) {
155 Ok(c) => c,
156 Err(e) => {
157 debug!("Failed to parse OpenAI chunk: {} data={}", e, &msg.data);
158 continue; // skip this event, keep processing the stream
159 }
160 };
161
162 // Usage appears in some chunks (varies by provider — not always the last)
163 if let Some(u) = &chunk.usage {
164 usage.input = u.prompt_tokens;
165 usage.output = u.completion_tokens;
166 usage.total_tokens = u.total_tokens;
167 if let Some(details) = &u.prompt_tokens_details {
168 usage.cache_read = details.cached_tokens; // prompt cache hits
169 }
170 if let Some(details) = &u.completion_tokens_details {
171 usage.reasoning = details.reasoning_tokens; // o-series reasoning
172 }
173 }
174
175 for choice in &chunk.choices {
176 let delta = &choice.delta;
177
178 /*
179 ARCHITECTURE: Thinking format dispatch
180
181 Different providers use different field names for chain-of-thought:
182 xAI (Grok): `delta.reasoning`
183 OpenRouter: `delta.reasoning_details` (array of {type, text})
184 OpenAI/others: `delta.reasoning_content`
185
186 `ThinkingFormat::Xai` → read from `delta.reasoning`
187 `ThinkingFormat::OpenRouter` → collect `delta.reasoning_details`
188 entries where type == "thinking"
189 all others → read from `delta.reasoning_content`
190
191 RUST QUIRK: `.as_deref()` on `Option<String>` → `Option<&str>`
192 `delta.reasoning` is `Option<String>`.
193 `.as_deref()` borrows the inner String as `&str`.
194 Result: `Option<&str>` — `None` if the field was absent,
195 `Some("thinking text")` if it had content.
196 */
197 // Owned string is needed for OpenRouter (assembled from array);
198 // other formats borrow directly from delta fields.
199 // `reasoning_owned` anchors the String so `reasoning` (&str) can borrow it.
200 let reasoning_owned = match compat.thinking_format {
201 ThinkingFormat::OpenRouter => {
202 delta.reasoning_details.as_ref().map(|details| {
203 details
204 .iter()
205 .filter(|d| d.detail_type == "thinking")
206 .filter_map(|d| d.text.as_deref())
207 .collect::<String>()
208 })
209 }
210 _ => None,
211 };
212 let reasoning = match compat.thinking_format {
213 ThinkingFormat::Xai => delta.reasoning.as_deref(),
214 ThinkingFormat::OpenRouter => reasoning_owned.as_deref(),
215 _ => delta.reasoning_content.as_deref(),
216 };
217 if let Some(reasoning_text) = reasoning {
218 // Find existing thinking block or create a new one
219 let thinking_idx = content.iter().position(|c| matches!(c, Content::Thinking { .. }));
220 let idx = match thinking_idx {
221 Some(i) => i,
222 None => {
223 content.push(Content::Thinking { thinking: String::new(), signature: None });
224 content.len() - 1
225 }
226 };
227 if let Some(Content::Thinking { thinking, .. }) = content.get_mut(idx) {
228 thinking.push_str(reasoning_text);
229 }
230 let _ = tx.send(StreamEvent::ThinkingDelta {
231 content_index: idx,
232 delta: reasoning_text.to_string(),
233 });
234 }
235
236 // Text content — find or create a text block
237 if let Some(text) = &delta.content {
238 let text_idx = content.iter().position(|c| matches!(c, Content::Text { .. }));
239 let idx = match text_idx {
240 Some(i) => i,
241 None => {
242 content.push(Content::Text { text: String::new() });
243 content.len() - 1
244 }
245 };
246 if let Some(Content::Text { text: t }) = content.get_mut(idx) {
247 t.push_str(text);
248 }
249 let _ = tx.send(StreamEvent::TextDelta {
250 content_index: idx,
251 delta: text.clone(),
252 });
253 }
254
255 /*
256 ARCHITECTURE: Tool call buffering
257
258 OpenAI streams tool calls as partial JSON fragments across
259 multiple chunks. Each `tc` (tool call delta) has:
260 `tc.index` — which parallel tool call this belongs to
261 `tc.id` — call ID (only in the first delta for this index)
262 `tc.function.name` — only in first delta
263 `tc.function.arguments` — partial JSON, streamed across many chunks
264
265 We maintain `tool_call_buffers[tc_index]` as scratch pads.
266 `while tool_call_buffers.len() <= tc_index { push empty buffer }`
267 — ensures the buffer exists before indexing.
268
269 RUST QUIRK: `buf.name.clone_from(name)` vs `buf.name = name.clone()`
270 `.clone_from(&src)` reuses the existing String allocation if possible
271 (it calls `String::replace_range` internally). Slightly more efficient
272 than `.clone()` when the target already has capacity.
273 */
274 if let Some(tool_calls) = &delta.tool_calls {
275 for tc in tool_calls {
276 let tc_index = tc.index as usize;
277 while tool_call_buffers.len() <= tc_index {
278 tool_call_buffers.push(ToolCallBuffer::default());
279 }
280 let buf = &mut tool_call_buffers[tc_index];
281 if let Some(id) = &tc.id {
282 buf.id = id.clone();
283 }
284 if let Some(f) = &tc.function {
285 if let Some(name) = &f.name {
286 buf.name.clone_from(name);
287 let _ = tx.send(StreamEvent::ToolCallStart {
288 content_index: content.len() + tc_index,
289 id: buf.id.clone(),
290 name: name.clone(),
291 });
292 }
293 if let Some(args) = &f.arguments {
294 buf.arguments.push_str(args);
295 let _ = tx.send(StreamEvent::ToolCallDelta {
296 content_index: content.len() + tc_index,
297 delta: args.clone(),
298 });
299 }
300 }
301 }
302 }
303
304 // `finish_reason` signals why the response stopped generating
305 if let Some(reason) = &choice.finish_reason {
306 stop_reason = match reason.as_str() {
307 "stop" => StopReason::Stop,
308 "length" => StopReason::Length,
309 "tool_calls" => StopReason::ToolUse,
310 _ => StopReason::Stop,
311 };
312 }
313 }
314 }
315 Some(Err(e)) => {
316 let err_str = e.to_string();
317 warn!("OpenAI SSE error: {}", err_str);
318
319 // Classify retryable HTTP errors so the retry loop can handle them.
320 // reqwest-eventsource renders InvalidStatusCode as
321 // "Invalid status code: {code} {reason}" (e.g. "Invalid status code: 429 Too Many Requests").
322 // We match on "status code: NNN" for precision, with broader fallbacks
323 // for error messages from other sources (e.g. provider JSON error bodies).
324 let err_lower = err_str.to_lowercase();
325 if err_lower.contains("status code: 429")
326 || err_lower.contains("rate limit")
327 || err_lower.contains("rate-limit")
328 {
329 return Err(ProviderError::RateLimited { retry_after_ms: None });
330 }
331 if err_lower.contains("status code: 502")
332 || err_lower.contains("status code: 503")
333 || err_lower.contains("status code: 504")
334 {
335 return Err(ProviderError::Network(err_str));
336 }
337
338 let err_msg = Message::Assistant {
339 content: vec![Content::Text { text: String::new() }],
340 stop_reason: StopReason::Error,
341 model: config.model_config.id.clone(),
342 provider: model_config.provider.clone(),
343 usage: usage.clone(),
344 timestamp: now_ms(),
345 error_message: Some(err_str),
346 };
347 let _ = tx.send(StreamEvent::Error { message: err_msg.clone() });
348 return Ok(err_msg);
349 }
350 }
351 }
352 }
353 }
354
355 /*
356 ARCHITECTURE: Finalizing tool calls after stream end
357
358 Only after the stream ends do we have complete JSON for each tool call.
359 We parse `buf.arguments` (the accumulated raw JSON string) and push a
360 `Content::ToolCall` for each buffer.
361
362 RUST QUIRK: `.unwrap_or(serde_json::Value::Object(Default::default()))`
363 If `serde_json::from_str` fails (malformed JSON), we fall back to an
364 empty JSON object `{}`. This is defensive: the agent loop should receive
365 a ToolCall even if it has empty arguments, so it can report the parsing
366 failure as a tool execution error rather than crashing the whole stream.
367 `Default::default()` for `serde_json::Map<...>` is an empty map — so
368 `serde_json::Value::Object(Default::default())` builds `{}`.
369 */
370 for buf in &tool_call_buffers {
371 let args = serde_json::from_str(&buf.arguments)
372 .unwrap_or(serde_json::Value::Object(Default::default()));
373 content.push(Content::ToolCall {
374 id: buf.id.clone(),
375 name: buf.name.clone(),
376 arguments: args,
377 });
378 let _ = tx.send(StreamEvent::ToolCallEnd {
379 content_index: content.len() - 1,
380 });
381 }
382
383 if !tool_call_buffers.is_empty() {
384 stop_reason = StopReason::ToolUse;
385 }
386
387 let message = Message::Assistant {
388 content,
389 stop_reason,
390 model: config.model_config.id.clone(),
391 provider: model_config.provider.clone(),
392 usage,
393 timestamp: now_ms(),
394 error_message: None,
395 };
396
397 let _ = tx.send(StreamEvent::Done {
398 message: message.clone(),
399 });
400 Ok(message)
401 }
402}
403
404/// Scratch pad for accumulating a single streaming tool call across many SSE chunks.
405/*
406RUST QUIRK: `#[derive(Default)]` on a struct with String fields
407 `String::default()` is an empty string `""`.
408 So `ToolCallBuffer::default()` gives `{ id: "", name: "", arguments: "" }`.
409 This lets us use `tool_call_buffers.push(ToolCallBuffer::default())` to
410 create new scratch pads without writing out all the fields manually.
411*/
412#[derive(Default)]
413struct ToolCallBuffer {
414 id: String, // tool call ID (arrives once, in the first chunk for this index)
415 name: String, // function name (arrives once)
416 arguments: String, // JSON arguments (accumulated across many chunks)
417}
418
419/// Builds the JSON request body for the OpenAI Chat Completions API.
420/*
421ARCHITECTURE: build_request_body — translation layer (yo-core types → OpenAI JSON)
422
423Converts our internal `StreamConfig` into the OpenAI-compatible JSON body.
424The `compat` flags control which variant of the API to use:
425 - System/developer role
426 - `max_tokens` vs `max_completion_tokens`
427 - `reasoning_effort` parameter for thinking-capable models
428 - Tool result `name` field (required by some providers)
429 - Image format (inline base64 vs URL reference)
430
431RUST QUIRK: `let role = if ... { "developer" } else { "system" }` — if as an expression
432 Unlike Python/Java where `if` is a statement, Rust's `if` is an EXPRESSION — it
433 evaluates to a value. Both branches must have the same type (here: `&str`).
434 Python analogy: `role = "developer" if compat.supports_developer_role else "system"`
435*/
436fn build_request_body(
437 config: &StreamConfig, // REQUEST — messages, tools, model, system prompt, cache config
438 model_config: &ModelConfig, // ROUTING — carries base_url (which provider) and api_key
439 compat: &OpenAiCompat, // QUIRK FLAGS — per-provider behavior switches (store, dev role, reasoning format, etc.)
440) -> serde_json::Value {
441 let mut messages: Vec<serde_json::Value> = Vec::new();
442
443 // System prompt — role depends on whether this provider uses "developer" vs "system"
444 if !config.system_prompt.is_empty() {
445 let role = if compat.supports_developer_role {
446 "developer" // OpenAI o-series models use "developer" for system-level instructions
447 } else {
448 "system" // Standard role for most other providers
449 };
450 messages.push(serde_json::json!({
451 "role": role,
452 "content": config.system_prompt,
453 }));
454 }
455
456 for msg in &config.messages {
457 match msg {
458 Message::User { content, .. } => {
459 messages.push(serde_json::json!({
460 "role": "user",
461 "content": content_to_openai(content),
462 }));
463 }
464 Message::Assistant { content, .. } => {
465 let mut parts: Vec<serde_json::Value> = Vec::new();
466 let mut tool_calls: Vec<serde_json::Value> = Vec::new();
467
468 for c in content {
469 match c {
470 Content::Text { text } => {
471 parts.push(serde_json::json!({"type": "text", "text": text}));
472 }
473 Content::ToolCall {
474 id,
475 name,
476 arguments,
477 } => {
478 tool_calls.push(serde_json::json!({
479 "id": id,
480 "type": "function",
481 "function": {"name": name, "arguments": arguments.to_string()},
482 }));
483 }
484 _ => {}
485 }
486 }
487
488 let mut msg_obj = serde_json::json!({"role": "assistant"});
489 if !parts.is_empty() {
490 msg_obj["content"] = serde_json::json!(parts);
491 }
492 if !tool_calls.is_empty() {
493 msg_obj["tool_calls"] = serde_json::json!(tool_calls);
494 }
495 messages.push(msg_obj);
496 }
497 Message::ToolResult {
498 tool_call_id,
499 tool_name,
500 content,
501 ..
502 } => {
503 let content_val = if content.iter().any(|c| matches!(c, Content::Image { .. })) {
504 // Images present: use array format for multimodal tool results
505 content_to_openai(content)
506 } else {
507 // Text-only: use plain string for maximum compat
508 let text = content
509 .iter()
510 .find_map(|c| match c {
511 Content::Text { text } => Some(text.clone()),
512 _ => None,
513 })
514 .unwrap_or_default();
515 serde_json::json!(text)
516 };
517
518 let mut msg_obj = serde_json::json!({
519 "role": "tool",
520 "tool_call_id": tool_call_id,
521 "content": content_val,
522 });
523 if compat.requires_tool_result_name {
524 msg_obj["name"] = serde_json::json!(tool_name);
525 }
526 messages.push(msg_obj);
527 }
528 }
529 }
530
531 let max_tokens_val = config.max_tokens.unwrap_or(model_config.max_tokens);
532 let mut body = serde_json::json!({
533 "model": config.model_config.id,
534 "stream": true,
535 "stream_options": {"include_usage": true},
536 "messages": messages,
537 });
538
539 match compat.max_tokens_field {
540 MaxTokensField::MaxCompletionTokens => {
541 body["max_completion_tokens"] = serde_json::json!(max_tokens_val);
542 }
543 MaxTokensField::MaxTokens => {
544 body["max_tokens"] = serde_json::json!(max_tokens_val);
545 }
546 }
547
548 if !config.tools.is_empty() {
549 let tools: Vec<serde_json::Value> = config
550 .tools
551 .iter()
552 .map(|t| {
553 serde_json::json!({
554 "type": "function",
555 "function": {
556 "name": t.name,
557 "description": t.description,
558 "parameters": t.parameters,
559 }
560 })
561 })
562 .collect();
563 body["tools"] = serde_json::json!(tools);
564 }
565
566 if config.thinking_level != ThinkingLevel::Off && compat.supports_reasoning_effort {
567 let effort = match config.thinking_level {
568 ThinkingLevel::Minimal | ThinkingLevel::Low => "low",
569 ThinkingLevel::Medium => "medium",
570 ThinkingLevel::High => "high",
571 ThinkingLevel::Off => unreachable!(),
572 };
573 body["reasoning_effort"] = serde_json::json!(effort);
574 }
575
576 if let Some(temp) = config.temperature {
577 body["temperature"] = serde_json::json!(temp);
578 }
579
580 // Structured-output wiring. OpenAI Chat Completions accepts a top-level
581 // `response_format` field with two shapes:
582 // { "type": "json_object" } — free-form JSON
583 // { "type": "json_schema", "json_schema": {...} } — strict schema
584 // Most non-OpenAI compat providers either honour the same key or ignore it
585 // gracefully — see the capability matrix in docs.
586 match &config.response_format {
587 ResponseFormat::Text => {} // default; omit the field
588 ResponseFormat::JsonObject => {
589 body["response_format"] = serde_json::json!({"type": "json_object"});
590 }
591 ResponseFormat::JsonSchema {
592 schema,
593 name,
594 strict,
595 } => {
596 body["response_format"] = serde_json::json!({
597 "type": "json_schema",
598 "json_schema": {
599 "name": name,
600 "schema": schema,
601 "strict": *strict,
602 },
603 });
604 }
605 }
606
607 body
608}
609
610/// Convert our `Content` blocks to OpenAI's message content format.
611/*
612ARCHITECTURE: content_to_openai — two output shapes for maximum compat
613
614OpenAI supports two shapes for message content:
615 1. A plain string: "Hello world" — for text-only, single-block messages
616 2. A parts array: [{"type":"text",...}] — for multi-block or image-containing messages
617
618Using a plain string where possible maximizes compatibility with older providers
619and reduces JSON payload size. The array format is required for images.
620
621RUST QUIRK: early return via `return`
622 `return serde_json::json!(text)` — exits the function early with a plain string.
623 After the `if` block, we fall through to build the array format.
624 Python analogy: `return text` for the early-exit case.
625*/
626fn content_to_openai(
627 content: &[Content], // SOURCE — slice of Content variants to convert; single Text → plain string, multiple → array
628) -> serde_json::Value {
629 // either a plain JSON string (1 text block) or an array of {type,text/image_url} objects
630 // Optimization: single text block → plain string (maximum provider compatibility)
631 if content.len() == 1 {
632 if let Content::Text { text } = &content[0] {
633 return serde_json::json!(text);
634 }
635 }
636 let parts: Vec<serde_json::Value> = content
637 .iter()
638 .filter_map(|c| match c {
639 Content::Text { text } => Some(serde_json::json!({"type": "text", "text": text})),
640 Content::Image { data, mime_type } => Some(serde_json::json!({
641 "type": "image_url",
642 "image_url": {"url": format!("data:{};base64,{}", mime_type, data)},
643 })),
644 _ => None,
645 })
646 .collect();
647 serde_json::json!(parts)
648}
649
650// ---------------------------------------------------------------------------
651// OpenAI streaming response deserialization types (private to this module)
652// ---------------------------------------------------------------------------
653/*
654ARCHITECTURE: Private deserialization types — mirroring OpenAI's streaming JSON
655
656These structs mirror the shape of OpenAI's streaming response chunks.
657A streaming chunk looks like:
658 {
659 "choices": [{
660 "delta": {
661 "content": "Hello ",
662 "tool_calls": [{"index": 0, "id": "call_abc", "function": {"name": "bash", "arguments": "{"}}]
663 },
664 "finish_reason": null
665 }],
666 "usage": null // only populated in the last chunk (if stream_options.include_usage = true)
667 }
668
669Multiple optional fields are marked `#[serde(default)]` so absent fields
670deserialize to `None` (for `Option<T>`) or `0` (for `u64`) without error.
671
672RUST QUIRK: `#[derive(Deserialize, Default)]` on `OpenAiDelta`
673 `Default` is needed because `OpenAiChoice.delta` is not `Option<OpenAiDelta>` —
674 it's always present in the JSON but may have all-None fields. The `#[derive(Default)]`
675 provides the "all fields are None" value for `unwrap_or_default()` call sites.
676*/
677
678#[derive(Deserialize)]
679struct OpenAiChunk {
680 #[serde(default)]
681 choices: Vec<OpenAiChoice>,
682 #[serde(default)]
683 usage: Option<OpenAiUsage>,
684}
685
686#[derive(Deserialize)]
687struct OpenAiChoice {
688 delta: OpenAiDelta,
689 #[serde(default)]
690 finish_reason: Option<String>,
691}
692
693/// A single entry in OpenRouter's `reasoning_details` array.
694#[derive(Deserialize)]
695struct OpenRouterReasoningDetail {
696 #[serde(rename = "type")]
697 detail_type: String,
698 #[serde(default)]
699 text: Option<String>,
700}
701
702#[derive(Deserialize, Default)]
703struct OpenAiDelta {
704 #[serde(default)]
705 content: Option<String>,
706 #[serde(default)]
707 reasoning_content: Option<String>,
708 #[serde(default)]
709 reasoning: Option<String>,
710 /// OpenRouter extended thinking: array of `{type, text}` objects.
711 #[serde(default)]
712 reasoning_details: Option<Vec<OpenRouterReasoningDetail>>,
713 #[serde(default)]
714 tool_calls: Option<Vec<OpenAiToolCallDelta>>,
715}
716
717#[derive(Deserialize)]
718struct OpenAiToolCallDelta {
719 #[serde(default)]
720 index: u32,
721 #[serde(default)]
722 id: Option<String>,
723 #[serde(default)]
724 function: Option<OpenAiFunctionDelta>,
725}
726
727#[derive(Deserialize)]
728struct OpenAiFunctionDelta {
729 #[serde(default)]
730 name: Option<String>,
731 #[serde(default)]
732 arguments: Option<String>,
733}
734
735#[derive(Deserialize)]
736struct OpenAiUsage {
737 #[serde(default)]
738 prompt_tokens: u64,
739 #[serde(default)]
740 completion_tokens: u64,
741 #[serde(default)]
742 total_tokens: u64,
743 #[serde(default)]
744 prompt_tokens_details: Option<OpenAiPromptTokensDetails>,
745 #[serde(default)]
746 completion_tokens_details: Option<OpenAiCompletionTokensDetails>,
747}
748
749#[derive(Deserialize)]
750struct OpenAiPromptTokensDetails {
751 #[serde(default)]
752 cached_tokens: u64,
753}
754
755#[derive(Deserialize)]
756struct OpenAiCompletionTokensDetails {
757 #[serde(default)]
758 reasoning_tokens: u64,
759}
760
761#[cfg(test)]
762mod tests {
763 use super::*;
764 use crate::provider::model::ModelConfig;
765
766 #[test]
767 fn test_build_request_body_basic() {
768 let model_config = ModelConfig::openai("gpt-4o", "GPT-4o", "test");
769 let config = StreamConfig {
770 model_config: model_config.clone(),
771 system_prompt: "You are helpful.".into(),
772 messages: vec![Message::user("Hello")],
773 tools: vec![],
774 thinking_level: ThinkingLevel::Off,
775 max_tokens: None,
776 temperature: None,
777 cache_config: CacheConfig::default(),
778 response_format: ResponseFormat::Text,
779 };
780
781 let body = build_request_body(&config, &model_config, &OpenAiCompat::openai());
782 assert_eq!(body["model"], "gpt-4o");
783 assert!(body["stream"].as_bool().unwrap());
784 // Developer role for OpenAI
785 assert_eq!(body["messages"][0]["role"], "developer");
786 assert_eq!(body["messages"][1]["role"], "user");
787 // max_completion_tokens for OpenAI
788 assert!(body["max_completion_tokens"].is_number());
789 }
790
791 #[test]
792 fn test_build_request_body_with_tools() {
793 let model_config = ModelConfig::openai("gpt-4o", "GPT-4o", "test");
794 let compat = OpenAiCompat::openai();
795 let config = StreamConfig {
796 model_config: model_config.clone(),
797 system_prompt: String::new(),
798 messages: vec![Message::user("List files")],
799 tools: vec![ToolDefinition {
800 name: "bash".into(),
801 description: "Run a command".into(),
802 parameters: serde_json::json!({"type": "object"}),
803 }],
804 thinking_level: ThinkingLevel::Off,
805 max_tokens: Some(1024),
806 temperature: Some(0.5),
807 cache_config: CacheConfig::default(),
808 response_format: ResponseFormat::Text,
809 };
810
811 let body = build_request_body(&config, &model_config, &compat);
812 assert!(body["tools"].is_array());
813 assert_eq!(body["tools"][0]["function"]["name"], "bash");
814 assert_eq!(body["temperature"], 0.5);
815 }
816
817 #[test]
818 fn test_content_to_openai_simple_text() {
819 let content = vec![Content::Text {
820 text: "hello".into(),
821 }];
822 let result = content_to_openai(&content);
823 assert_eq!(result, "hello");
824 }
825
826 #[test]
827 fn test_content_to_openai_multipart() {
828 let content = vec![
829 Content::Text {
830 text: "look at this".into(),
831 },
832 Content::Image {
833 data: "abc".into(),
834 mime_type: "image/png".into(),
835 },
836 ];
837 let result = content_to_openai(&content);
838 assert!(result.is_array());
839 assert_eq!(result[0]["type"], "text");
840 assert_eq!(result[1]["type"], "image_url");
841 }
842
843 #[test]
844 fn test_tool_result_with_image() {
845 let model_config = ModelConfig::openai("gpt-4o", "GPT-4o", "test");
846 let compat = OpenAiCompat::openai();
847 let config = StreamConfig {
848 model_config: model_config.clone(),
849 system_prompt: String::new(),
850 messages: vec![
851 Message::Assistant {
852 content: vec![Content::ToolCall {
853 id: "call-1".into(),
854 name: "read_file".into(),
855 arguments: serde_json::json!({"path": "img.png"}),
856 }],
857 stop_reason: StopReason::ToolUse,
858 model: "test".into(),
859 provider: "test".into(),
860 usage: Usage::default(),
861 timestamp: 0,
862 error_message: None,
863 },
864 Message::ToolResult {
865 tool_call_id: "call-1".into(),
866 tool_name: "read_file".into(),
867 content: vec![Content::Image {
868 data: "aW1hZ2VkYXRh".into(),
869 mime_type: "image/png".into(),
870 }],
871 is_error: false,
872 timestamp: 0,
873 },
874 ],
875 tools: vec![],
876 thinking_level: ThinkingLevel::Off,
877 max_tokens: None,
878 temperature: None,
879 cache_config: CacheConfig::default(),
880 response_format: ResponseFormat::Text,
881 };
882
883 let body = build_request_body(&config, &model_config, &compat);
884 let msgs = body["messages"].as_array().unwrap();
885 // tool result is the last message (after system + assistant)
886 let tool_msg = msgs.last().unwrap();
887 assert_eq!(tool_msg["role"], "tool");
888 // content should be an array with image_url
889 let content = tool_msg["content"].as_array().unwrap();
890 assert_eq!(content[0]["type"], "image_url");
891 assert!(content[0]["image_url"]["url"]
892 .as_str()
893 .unwrap()
894 .starts_with("data:image/png;base64,"));
895 }
896
897 #[test]
898 fn test_tool_result_text_only_uses_string() {
899 let model_config = ModelConfig::openai("gpt-4o", "GPT-4o", "test");
900 let compat = OpenAiCompat::openai();
901 let config = StreamConfig {
902 model_config: model_config.clone(),
903 system_prompt: String::new(),
904 messages: vec![Message::ToolResult {
905 tool_call_id: "call-1".into(),
906 tool_name: "bash".into(),
907 content: vec![Content::Text {
908 text: "hello".into(),
909 }],
910 is_error: false,
911 timestamp: 0,
912 }],
913 tools: vec![],
914 thinking_level: ThinkingLevel::Off,
915 max_tokens: None,
916 temperature: None,
917 cache_config: CacheConfig::default(),
918 response_format: ResponseFormat::Text,
919 };
920
921 let body = build_request_body(&config, &model_config, &compat);
922 let msgs = body["messages"].as_array().unwrap();
923 let tool_msg = msgs.last().unwrap();
924 // Text-only: content should be a plain string
925 assert_eq!(tool_msg["content"], "hello");
926 }
927}