Skip to main content

rig/providers/openai/completion/
mod.rs

1// ================================================================
2// OpenAI Completion API
3// ================================================================
4
5use super::{
6    client::{ApiErrorResponse, ApiResponse},
7    streaming::StreamingCompletionResponse,
8};
9use crate::completion::{
10    CompletionError, CompletionRequest as CoreCompletionRequest, GetTokenUsage,
11};
12use crate::http_client::{self, HttpClientExt};
13use crate::message::{AudioMediaType, DocumentSourceKind, ImageDetail, MimeType};
14use crate::one_or_many::string_or_one_or_many;
15use crate::telemetry::{ProviderResponseExt, SpanCombinator};
16use crate::wasm_compat::{WasmCompatSend, WasmCompatSync};
17use crate::{OneOrMany, completion, json_utils, message};
18use serde::{Deserialize, Serialize, Serializer};
19use std::convert::Infallible;
20use std::fmt;
21use tracing::{Instrument, Level, enabled, info_span};
22
23use std::str::FromStr;
24
25pub mod streaming;
26
27/// Serializes user content as a plain string when there's a single text item,
28/// otherwise as an array of content parts.
29fn serialize_user_content<S>(
30    content: &OneOrMany<UserContent>,
31    serializer: S,
32) -> Result<S::Ok, S::Error>
33where
34    S: Serializer,
35{
36    if content.len() == 1
37        && let UserContent::Text { text } = content.first_ref()
38    {
39        return serializer.serialize_str(text);
40    }
41    content.serialize(serializer)
42}
43
44/// `gpt-5.5` completion model
45pub const GPT_5_5: &str = "gpt-5.5";
46
47/// `gpt-5.2` completion model
48pub const GPT_5_2: &str = "gpt-5.2";
49
50/// `gpt-5.1` completion model
51pub const GPT_5_1: &str = "gpt-5.1";
52
53/// `gpt-5` completion model
54pub const GPT_5: &str = "gpt-5";
55/// `gpt-5` completion model
56pub const GPT_5_MINI: &str = "gpt-5-mini";
57/// `gpt-5` completion model
58pub const GPT_5_NANO: &str = "gpt-5-nano";
59
60/// `gpt-4.5-preview` completion model
61pub const GPT_4_5_PREVIEW: &str = "gpt-4.5-preview";
62/// `gpt-4.5-preview-2025-02-27` completion model
63pub const GPT_4_5_PREVIEW_2025_02_27: &str = "gpt-4.5-preview-2025-02-27";
64/// `gpt-4o-2024-11-20` completion model (this is newer than 4o)
65pub const GPT_4O_2024_11_20: &str = "gpt-4o-2024-11-20";
66/// `gpt-4o` completion model
67pub const GPT_4O: &str = "gpt-4o";
68/// `gpt-4o-mini` completion model
69pub const GPT_4O_MINI: &str = "gpt-4o-mini";
70/// `gpt-4o-2024-05-13` completion model
71pub const GPT_4O_2024_05_13: &str = "gpt-4o-2024-05-13";
72/// `gpt-4-turbo` completion model
73pub const GPT_4_TURBO: &str = "gpt-4-turbo";
74/// `gpt-4-turbo-2024-04-09` completion model
75pub const GPT_4_TURBO_2024_04_09: &str = "gpt-4-turbo-2024-04-09";
76/// `gpt-4-turbo-preview` completion model
77pub const GPT_4_TURBO_PREVIEW: &str = "gpt-4-turbo-preview";
78/// `gpt-4-0125-preview` completion model
79pub const GPT_4_0125_PREVIEW: &str = "gpt-4-0125-preview";
80/// `gpt-4-1106-preview` completion model
81pub const GPT_4_1106_PREVIEW: &str = "gpt-4-1106-preview";
82/// `gpt-4-vision-preview` completion model
83pub const GPT_4_VISION_PREVIEW: &str = "gpt-4-vision-preview";
84/// `gpt-4-1106-vision-preview` completion model
85pub const GPT_4_1106_VISION_PREVIEW: &str = "gpt-4-1106-vision-preview";
86/// `gpt-4` completion model
87pub const GPT_4: &str = "gpt-4";
88/// `gpt-4-0613` completion model
89pub const GPT_4_0613: &str = "gpt-4-0613";
90/// `gpt-4-32k` completion model
91pub const GPT_4_32K: &str = "gpt-4-32k";
92/// `gpt-4-32k-0613` completion model
93pub const GPT_4_32K_0613: &str = "gpt-4-32k-0613";
94
95/// `o4-mini-2025-04-16` completion model
96pub const O4_MINI_2025_04_16: &str = "o4-mini-2025-04-16";
97/// `o4-mini` completion model
98pub const O4_MINI: &str = "o4-mini";
99/// `o3` completion model
100pub const O3: &str = "o3";
101/// `o3-mini` completion model
102pub const O3_MINI: &str = "o3-mini";
103/// `o3-mini-2025-01-31` completion model
104pub const O3_MINI_2025_01_31: &str = "o3-mini-2025-01-31";
105/// `o1-pro` completion model
106pub const O1_PRO: &str = "o1-pro";
107/// `o1`` completion model
108pub const O1: &str = "o1";
109/// `o1-2024-12-17` completion model
110pub const O1_2024_12_17: &str = "o1-2024-12-17";
111/// `o1-preview` completion model
112pub const O1_PREVIEW: &str = "o1-preview";
113/// `o1-preview-2024-09-12` completion model
114pub const O1_PREVIEW_2024_09_12: &str = "o1-preview-2024-09-12";
115/// `o1-mini completion model
116pub const O1_MINI: &str = "o1-mini";
117/// `o1-mini-2024-09-12` completion model
118pub const O1_MINI_2024_09_12: &str = "o1-mini-2024-09-12";
119
120/// `gpt-4.1-mini` completion model
121pub const GPT_4_1_MINI: &str = "gpt-4.1-mini";
122/// `gpt-4.1-nano` completion model
123pub const GPT_4_1_NANO: &str = "gpt-4.1-nano";
124/// `gpt-4.1-2025-04-14` completion model
125pub const GPT_4_1_2025_04_14: &str = "gpt-4.1-2025-04-14";
126/// `gpt-4.1` completion model
127pub const GPT_4_1: &str = "gpt-4.1";
128
129impl From<ApiErrorResponse> for CompletionError {
130    fn from(err: ApiErrorResponse) -> Self {
131        CompletionError::ProviderError(err.message)
132    }
133}
134
135#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
136#[serde(tag = "role", rename_all = "lowercase")]
137pub enum Message {
138    #[serde(alias = "developer")]
139    System {
140        #[serde(deserialize_with = "string_or_one_or_many")]
141        content: OneOrMany<SystemContent>,
142        #[serde(skip_serializing_if = "Option::is_none")]
143        name: Option<String>,
144    },
145    User {
146        #[serde(
147            deserialize_with = "string_or_one_or_many",
148            serialize_with = "serialize_user_content"
149        )]
150        content: OneOrMany<UserContent>,
151        #[serde(skip_serializing_if = "Option::is_none")]
152        name: Option<String>,
153    },
154    Assistant {
155        #[serde(
156            default,
157            deserialize_with = "json_utils::string_or_vec",
158            skip_serializing_if = "Vec::is_empty",
159            serialize_with = "serialize_assistant_content_vec"
160        )]
161        content: Vec<AssistantContent>,
162        // OpenAI-compatible providers expose hidden reasoning on this non-standard
163        // field, and some require it to be echoed back on assistant tool-call turns.
164        #[serde(skip_serializing_if = "Option::is_none", rename = "reasoning_content")]
165        reasoning: Option<String>,
166        #[serde(skip_serializing_if = "Option::is_none")]
167        refusal: Option<String>,
168        #[serde(skip_serializing_if = "Option::is_none")]
169        audio: Option<AudioAssistant>,
170        #[serde(skip_serializing_if = "Option::is_none")]
171        name: Option<String>,
172        #[serde(
173            default,
174            deserialize_with = "json_utils::null_or_vec",
175            skip_serializing_if = "Vec::is_empty"
176        )]
177        tool_calls: Vec<ToolCall>,
178    },
179    #[serde(rename = "tool")]
180    ToolResult {
181        tool_call_id: String,
182        content: ToolResultContentValue,
183    },
184}
185
186impl Message {
187    pub fn system(content: &str) -> Self {
188        Message::System {
189            content: OneOrMany::one(content.to_owned().into()),
190            name: None,
191        }
192    }
193}
194
195fn history_contains_tool_result(messages: &[Message]) -> bool {
196    messages
197        .iter()
198        .any(|message| matches!(message, Message::ToolResult { .. }))
199}
200
201#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
202pub struct AudioAssistant {
203    pub id: String,
204}
205
206#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
207pub struct SystemContent {
208    #[serde(default)]
209    pub r#type: SystemContentType,
210    pub text: String,
211}
212
213#[derive(Default, Debug, Serialize, Deserialize, PartialEq, Clone)]
214#[serde(rename_all = "lowercase")]
215pub enum SystemContentType {
216    #[default]
217    Text,
218}
219
220#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
221#[serde(tag = "type", rename_all = "lowercase")]
222pub enum AssistantContent {
223    Text { text: String },
224    Refusal { refusal: String },
225}
226
227impl From<AssistantContent> for completion::AssistantContent {
228    fn from(value: AssistantContent) -> Self {
229        match value {
230            AssistantContent::Text { text } => completion::AssistantContent::text(text),
231            AssistantContent::Refusal { refusal } => completion::AssistantContent::text(refusal),
232        }
233    }
234}
235
236#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
237#[serde(tag = "type", rename_all = "lowercase")]
238pub enum UserContent {
239    Text {
240        text: String,
241    },
242    #[serde(rename = "image_url")]
243    Image {
244        image_url: ImageUrl,
245    },
246    Audio {
247        input_audio: InputAudio,
248    },
249}
250
251#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
252pub struct ImageUrl {
253    pub url: String,
254    #[serde(default)]
255    pub detail: ImageDetail,
256}
257
258#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
259pub struct InputAudio {
260    pub data: String,
261    pub format: AudioMediaType,
262}
263
264#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
265pub struct ToolResultContent {
266    #[serde(default)]
267    r#type: ToolResultContentType,
268    pub text: String,
269}
270
271#[derive(Default, Debug, Serialize, Deserialize, PartialEq, Clone)]
272#[serde(rename_all = "lowercase")]
273pub enum ToolResultContentType {
274    #[default]
275    Text,
276}
277
278impl FromStr for ToolResultContent {
279    type Err = Infallible;
280
281    fn from_str(s: &str) -> Result<Self, Self::Err> {
282        Ok(s.to_owned().into())
283    }
284}
285
286impl From<String> for ToolResultContent {
287    fn from(s: String) -> Self {
288        ToolResultContent {
289            r#type: ToolResultContentType::default(),
290            text: s,
291        }
292    }
293}
294
295#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
296#[serde(untagged)]
297pub enum ToolResultContentValue {
298    Array(Vec<ToolResultContent>),
299    String(String),
300}
301
302impl ToolResultContentValue {
303    pub fn from_string(s: String, use_array_format: bool) -> Self {
304        if use_array_format {
305            ToolResultContentValue::Array(vec![ToolResultContent::from(s)])
306        } else {
307            ToolResultContentValue::String(s)
308        }
309    }
310
311    pub fn as_text(&self) -> String {
312        match self {
313            ToolResultContentValue::Array(arr) => arr
314                .iter()
315                .map(|c| c.text.clone())
316                .collect::<Vec<_>>()
317                .join("\n"),
318            ToolResultContentValue::String(s) => s.clone(),
319        }
320    }
321
322    pub fn to_array(&self) -> Self {
323        match self {
324            ToolResultContentValue::Array(_) => self.clone(),
325            ToolResultContentValue::String(s) => {
326                ToolResultContentValue::Array(vec![ToolResultContent::from(s.clone())])
327            }
328        }
329    }
330}
331
332#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
333pub struct ToolCall {
334    pub id: String,
335    #[serde(default)]
336    pub r#type: ToolType,
337    pub function: Function,
338}
339
340#[derive(Default, Debug, Serialize, Deserialize, PartialEq, Clone)]
341#[serde(rename_all = "lowercase")]
342pub enum ToolType {
343    #[default]
344    Function,
345}
346
347/// Function definition for a tool, with optional strict mode
348#[derive(Debug, Deserialize, Serialize, Clone)]
349pub struct FunctionDefinition {
350    pub name: String,
351    pub description: String,
352    pub parameters: serde_json::Value,
353    #[serde(skip_serializing_if = "Option::is_none")]
354    pub strict: Option<bool>,
355}
356
357#[derive(Debug, Deserialize, Serialize, Clone)]
358pub struct ToolDefinition {
359    pub r#type: String,
360    pub function: FunctionDefinition,
361}
362
363impl From<completion::ToolDefinition> for ToolDefinition {
364    fn from(tool: completion::ToolDefinition) -> Self {
365        Self {
366            r#type: "function".into(),
367            function: FunctionDefinition {
368                name: tool.name,
369                description: tool.description,
370                parameters: tool.parameters,
371                strict: None,
372            },
373        }
374    }
375}
376
377impl ToolDefinition {
378    /// Apply strict mode to this tool definition.
379    /// This sets `strict: true` and sanitizes the schema to meet OpenAI requirements.
380    pub fn with_strict(mut self) -> Self {
381        self.function.strict = Some(true);
382        super::sanitize_schema(&mut self.function.parameters);
383        self
384    }
385}
386
387#[derive(Default, Clone, Debug, Deserialize, Serialize, PartialEq)]
388#[serde(rename_all = "snake_case")]
389pub enum ToolChoice {
390    #[default]
391    Auto,
392    None,
393    Required,
394}
395
396impl TryFrom<crate::message::ToolChoice> for ToolChoice {
397    type Error = CompletionError;
398    fn try_from(value: crate::message::ToolChoice) -> Result<Self, Self::Error> {
399        let res = match value {
400            message::ToolChoice::Specific { .. } => {
401                return Err(CompletionError::ProviderError(
402                    "Provider doesn't support only using specific tools".to_string(),
403                ));
404            }
405            message::ToolChoice::Auto => Self::Auto,
406            message::ToolChoice::None => Self::None,
407            message::ToolChoice::Required => Self::Required,
408        };
409
410        Ok(res)
411    }
412}
413
414#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
415pub struct Function {
416    pub name: String,
417    #[serde(
418        serialize_with = "json_utils::stringified_json::serialize",
419        deserialize_with = "json_utils::stringified_json::deserialize_maybe_stringified"
420    )]
421    pub arguments: serde_json::Value,
422}
423
424impl TryFrom<message::ToolResult> for Message {
425    type Error = message::MessageError;
426
427    fn try_from(value: message::ToolResult) -> Result<Self, Self::Error> {
428        let text = value
429            .content
430            .into_iter()
431            .map(|content| {
432                match content {
433                message::ToolResultContent::Text(message::Text { text }) => Ok(text),
434                message::ToolResultContent::Image(_) => Err(message::MessageError::ConversionError(
435                    "OpenAI does not support images in tool results. Tool results must be text."
436                        .into(),
437                )),
438            }
439            })
440            .collect::<Result<Vec<_>, _>>()?
441            .join("\n");
442
443        Ok(Message::ToolResult {
444            tool_call_id: value.id,
445            content: ToolResultContentValue::String(text),
446        })
447    }
448}
449
450impl TryFrom<message::UserContent> for UserContent {
451    type Error = message::MessageError;
452
453    fn try_from(value: message::UserContent) -> Result<Self, Self::Error> {
454        match value {
455            message::UserContent::Text(message::Text { text }) => Ok(UserContent::Text { text }),
456            message::UserContent::Image(message::Image {
457                data,
458                detail,
459                media_type,
460                ..
461            }) => match data {
462                DocumentSourceKind::Url(url) => Ok(UserContent::Image {
463                    image_url: ImageUrl {
464                        url,
465                        detail: detail.unwrap_or_default(),
466                    },
467                }),
468                DocumentSourceKind::Base64(data) => {
469                    let url = format!(
470                        "data:{};base64,{}",
471                        media_type.map(|i| i.to_mime_type()).ok_or(
472                            message::MessageError::ConversionError(
473                                "OpenAI Image URI must have media type".into()
474                            )
475                        )?,
476                        data
477                    );
478
479                    let detail = detail.ok_or(message::MessageError::ConversionError(
480                        "OpenAI image URI must have image detail".into(),
481                    ))?;
482
483                    Ok(UserContent::Image {
484                        image_url: ImageUrl { url, detail },
485                    })
486                }
487                DocumentSourceKind::Raw(_) => Err(message::MessageError::ConversionError(
488                    "Raw files not supported, encode as base64 first".into(),
489                )),
490                DocumentSourceKind::Unknown => Err(message::MessageError::ConversionError(
491                    "Document has no body".into(),
492                )),
493                doc => Err(message::MessageError::ConversionError(format!(
494                    "Unsupported document type: {doc:?}"
495                ))),
496            },
497            message::UserContent::Document(message::Document { data, .. }) => {
498                if let DocumentSourceKind::Base64(text) | DocumentSourceKind::String(text) = data {
499                    Ok(UserContent::Text { text })
500                } else {
501                    Err(message::MessageError::ConversionError(
502                        "Documents must be base64 or a string".into(),
503                    ))
504                }
505            }
506            message::UserContent::Audio(message::Audio {
507                data, media_type, ..
508            }) => match data {
509                DocumentSourceKind::Base64(data) => Ok(UserContent::Audio {
510                    input_audio: InputAudio {
511                        data,
512                        format: match media_type {
513                            Some(media_type) => media_type,
514                            None => AudioMediaType::MP3,
515                        },
516                    },
517                }),
518                DocumentSourceKind::Url(_) => Err(message::MessageError::ConversionError(
519                    "URLs are not supported for audio".into(),
520                )),
521                DocumentSourceKind::Raw(_) => Err(message::MessageError::ConversionError(
522                    "Raw files are not supported for audio".into(),
523                )),
524                DocumentSourceKind::Unknown => Err(message::MessageError::ConversionError(
525                    "Audio has no body".into(),
526                )),
527                audio => Err(message::MessageError::ConversionError(format!(
528                    "Unsupported audio type: {audio:?}"
529                ))),
530            },
531            message::UserContent::ToolResult(_) => Err(message::MessageError::ConversionError(
532                "Tool result is in unsupported format".into(),
533            )),
534            message::UserContent::Video(_) => Err(message::MessageError::ConversionError(
535                "Video is in unsupported format".into(),
536            )),
537        }
538    }
539}
540
541impl TryFrom<OneOrMany<message::UserContent>> for Vec<Message> {
542    type Error = message::MessageError;
543
544    fn try_from(value: OneOrMany<message::UserContent>) -> Result<Self, Self::Error> {
545        let (tool_results, other_content): (Vec<_>, Vec<_>) = value
546            .into_iter()
547            .partition(|content| matches!(content, message::UserContent::ToolResult(_)));
548
549        // If there are messages with both tool results and user content, openai will only
550        //  handle tool results. It's unlikely that there will be both.
551        if !tool_results.is_empty() {
552            tool_results
553                .into_iter()
554                .map(|content| match content {
555                    message::UserContent::ToolResult(tool_result) => tool_result.try_into(),
556                    _ => Err(message::MessageError::ConversionError(
557                        "expected tool result content while converting OpenAI input".into(),
558                    )),
559                })
560                .collect::<Result<Vec<_>, _>>()
561        } else {
562            let other_content: Vec<UserContent> = other_content
563                .into_iter()
564                .map(|content| content.try_into())
565                .collect::<Result<Vec<_>, _>>()?;
566
567            let other_content = OneOrMany::many(other_content).map_err(|_| {
568                message::MessageError::ConversionError(
569                    "OpenAI user message did not contain any non-tool content".into(),
570                )
571            })?;
572
573            Ok(vec![Message::User {
574                content: other_content,
575                name: None,
576            }])
577        }
578    }
579}
580
581impl TryFrom<OneOrMany<message::AssistantContent>> for Vec<Message> {
582    type Error = message::MessageError;
583
584    fn try_from(value: OneOrMany<message::AssistantContent>) -> Result<Self, Self::Error> {
585        let mut text_content = Vec::new();
586        let mut tool_calls = Vec::new();
587        let mut reasoning_text = String::new();
588
589        for content in value {
590            match content {
591                message::AssistantContent::Text(text) => text_content.push(text),
592                message::AssistantContent::ToolCall(tool_call) => tool_calls.push(tool_call),
593                message::AssistantContent::Reasoning(reasoning) => {
594                    reasoning_text.push_str(&reasoning.display_text());
595                }
596                message::AssistantContent::Image(_) => {
597                    return Err(message::MessageError::ConversionError(
598                        "OpenAI assistant messages do not support image content in chat completions"
599                            .into(),
600                    ));
601                }
602            }
603        }
604
605        if text_content.is_empty() && tool_calls.is_empty() {
606            return Ok(vec![]);
607        }
608
609        Ok(vec![Message::Assistant {
610            content: text_content
611                .into_iter()
612                .map(|content| content.text.into())
613                .collect::<Vec<_>>(),
614            reasoning: if reasoning_text.is_empty() {
615                None
616            } else {
617                Some(reasoning_text)
618            },
619            refusal: None,
620            audio: None,
621            name: None,
622            tool_calls: tool_calls
623                .into_iter()
624                .map(|tool_call| tool_call.into())
625                .collect::<Vec<_>>(),
626        }])
627    }
628}
629
630impl TryFrom<message::Message> for Vec<Message> {
631    type Error = message::MessageError;
632
633    fn try_from(message: message::Message) -> Result<Self, Self::Error> {
634        match message {
635            message::Message::System { content } => Ok(vec![Message::system(&content)]),
636            message::Message::User { content } => content.try_into(),
637            message::Message::Assistant { content, .. } => content.try_into(),
638        }
639    }
640}
641
642impl From<message::ToolCall> for ToolCall {
643    fn from(tool_call: message::ToolCall) -> Self {
644        Self {
645            id: tool_call.id,
646            r#type: ToolType::default(),
647            function: Function {
648                name: tool_call.function.name,
649                arguments: tool_call.function.arguments,
650            },
651        }
652    }
653}
654
655impl From<ToolCall> for message::ToolCall {
656    fn from(tool_call: ToolCall) -> Self {
657        Self {
658            id: tool_call.id,
659            call_id: None,
660            function: message::ToolFunction {
661                name: tool_call.function.name,
662                arguments: tool_call.function.arguments,
663            },
664            signature: None,
665            additional_params: None,
666        }
667    }
668}
669
670impl TryFrom<Message> for message::Message {
671    type Error = message::MessageError;
672
673    fn try_from(message: Message) -> Result<Self, Self::Error> {
674        Ok(match message {
675            Message::User { content, .. } => message::Message::User {
676                content: content.map(|content| content.into()),
677            },
678            Message::Assistant {
679                content,
680                tool_calls,
681                reasoning,
682                ..
683            } => {
684                let mut assistant_content = Vec::new();
685
686                if let Some(reasoning) = reasoning
687                    && !reasoning.is_empty()
688                {
689                    assistant_content.push(message::AssistantContent::reasoning(reasoning));
690                }
691
692                assistant_content.extend(content.into_iter().map(|content| match content {
693                    AssistantContent::Text { text } => message::AssistantContent::text(text),
694                    AssistantContent::Refusal { refusal } => {
695                        message::AssistantContent::text(refusal)
696                    }
697                }));
698
699                assistant_content.extend(
700                    tool_calls
701                        .into_iter()
702                        .map(|tool_call| Ok(message::AssistantContent::ToolCall(tool_call.into())))
703                        .collect::<Result<Vec<_>, _>>()?,
704                );
705
706                message::Message::Assistant {
707                    id: None,
708                    content: OneOrMany::many(assistant_content).map_err(|_| {
709                        message::MessageError::ConversionError(
710                            "Neither `content` nor `tool_calls` was provided to the Message"
711                                .to_owned(),
712                        )
713                    })?,
714                }
715            }
716
717            Message::ToolResult {
718                tool_call_id,
719                content,
720            } => message::Message::User {
721                content: OneOrMany::one(message::UserContent::tool_result(
722                    tool_call_id,
723                    OneOrMany::one(message::ToolResultContent::text(content.as_text())),
724                )),
725            },
726
727            // System messages should get stripped out when converting messages, this is just a
728            // stop gap to avoid obnoxious error handling or panic occurring.
729            Message::System { content, .. } => message::Message::User {
730                content: content.map(|content| message::UserContent::text(content.text)),
731            },
732        })
733    }
734}
735
736impl From<UserContent> for message::UserContent {
737    fn from(content: UserContent) -> Self {
738        match content {
739            UserContent::Text { text } => message::UserContent::text(text),
740            UserContent::Image { image_url } => {
741                message::UserContent::image_url(image_url.url, None, Some(image_url.detail))
742            }
743            UserContent::Audio { input_audio } => {
744                message::UserContent::audio(input_audio.data, Some(input_audio.format))
745            }
746        }
747    }
748}
749
750impl From<String> for UserContent {
751    fn from(s: String) -> Self {
752        UserContent::Text { text: s }
753    }
754}
755
756impl FromStr for UserContent {
757    type Err = Infallible;
758
759    fn from_str(s: &str) -> Result<Self, Self::Err> {
760        Ok(UserContent::Text {
761            text: s.to_string(),
762        })
763    }
764}
765
766impl From<String> for AssistantContent {
767    fn from(s: String) -> Self {
768        AssistantContent::Text { text: s }
769    }
770}
771
772impl FromStr for AssistantContent {
773    type Err = Infallible;
774
775    fn from_str(s: &str) -> Result<Self, Self::Err> {
776        Ok(AssistantContent::Text {
777            text: s.to_string(),
778        })
779    }
780}
781impl From<String> for SystemContent {
782    fn from(s: String) -> Self {
783        SystemContent {
784            r#type: SystemContentType::default(),
785            text: s,
786        }
787    }
788}
789
790impl FromStr for SystemContent {
791    type Err = Infallible;
792
793    fn from_str(s: &str) -> Result<Self, Self::Err> {
794        Ok(SystemContent {
795            r#type: SystemContentType::default(),
796            text: s.to_string(),
797        })
798    }
799}
800
801#[derive(Debug, Deserialize, Serialize)]
802pub struct CompletionResponse {
803    pub id: String,
804    pub object: String,
805    pub created: u64,
806    pub model: String,
807    pub system_fingerprint: Option<String>,
808    pub choices: Vec<Choice>,
809    pub usage: Option<Usage>,
810}
811
812impl TryFrom<CompletionResponse> for completion::CompletionResponse<CompletionResponse> {
813    type Error = CompletionError;
814
815    fn try_from(response: CompletionResponse) -> Result<Self, Self::Error> {
816        let choice = response.choices.first().ok_or_else(|| {
817            CompletionError::ResponseError("Response contained no choices".to_owned())
818        })?;
819
820        let content = match &choice.message {
821            Message::Assistant {
822                content,
823                tool_calls,
824                reasoning,
825                ..
826            } => {
827                let mut content = content
828                    .iter()
829                    .filter_map(|c| {
830                        let s = match c {
831                            AssistantContent::Text { text } => text,
832                            AssistantContent::Refusal { refusal } => refusal,
833                        };
834                        if s.is_empty() {
835                            None
836                        } else {
837                            Some(completion::AssistantContent::text(s))
838                        }
839                    })
840                    .collect::<Vec<_>>();
841
842                if let Some(reasoning) = reasoning {
843                    // llama.cpp exposes hidden reasoning on a separate non-standard field.
844                    // Keep it structured here so the non-streaming path matches streaming
845                    // behavior and does not pollute plain-text response surfaces.
846                    content.push(completion::AssistantContent::reasoning(reasoning));
847                }
848
849                content.extend(
850                    tool_calls
851                        .iter()
852                        .map(|call| {
853                            completion::AssistantContent::tool_call(
854                                &call.id,
855                                &call.function.name,
856                                call.function.arguments.clone(),
857                            )
858                        })
859                        .collect::<Vec<_>>(),
860                );
861                Ok(content)
862            }
863            _ => Err(CompletionError::ResponseError(
864                "Response did not contain a valid message or tool call".into(),
865            )),
866        }?;
867
868        let choice = OneOrMany::many(content).map_err(|_| {
869            CompletionError::ResponseError(
870                "Response contained no message or tool call (empty)".to_owned(),
871            )
872        })?;
873
874        let usage = response
875            .usage
876            .as_ref()
877            .map(|usage| completion::Usage {
878                input_tokens: usage.prompt_tokens as u64,
879                output_tokens: (usage.total_tokens - usage.prompt_tokens) as u64,
880                total_tokens: usage.total_tokens as u64,
881                cached_input_tokens: usage
882                    .prompt_tokens_details
883                    .as_ref()
884                    .map(|d| d.cached_tokens as u64)
885                    .unwrap_or(0),
886                cache_creation_input_tokens: 0,
887            })
888            .unwrap_or_default();
889
890        Ok(completion::CompletionResponse {
891            choice,
892            usage,
893            raw_response: response,
894            message_id: None,
895        })
896    }
897}
898
899impl ProviderResponseExt for CompletionResponse {
900    type OutputMessage = Choice;
901    type Usage = Usage;
902
903    fn get_response_id(&self) -> Option<String> {
904        Some(self.id.to_owned())
905    }
906
907    fn get_response_model_name(&self) -> Option<String> {
908        Some(self.model.to_owned())
909    }
910
911    fn get_output_messages(&self) -> Vec<Self::OutputMessage> {
912        self.choices.clone()
913    }
914
915    fn get_text_response(&self) -> Option<String> {
916        let response = self
917            .choices
918            .iter()
919            .filter_map(|choice| assistant_message_text_response(&choice.message))
920            .collect::<Vec<_>>()
921            .join("\n");
922
923        if response.is_empty() {
924            None
925        } else {
926            Some(response)
927        }
928    }
929
930    fn get_usage(&self) -> Option<Self::Usage> {
931        self.usage.clone()
932    }
933}
934
935fn assistant_message_text_response(message: &Message) -> Option<String> {
936    let Message::Assistant {
937        content, refusal, ..
938    } = message
939    else {
940        return None;
941    };
942
943    let mut segments = content
944        .iter()
945        .filter_map(|content| match content {
946            AssistantContent::Text { text } => (!text.is_empty()).then(|| text.clone()),
947            AssistantContent::Refusal { refusal } => (!refusal.is_empty()).then(|| refusal.clone()),
948        })
949        .collect::<Vec<_>>();
950
951    if segments.is_empty()
952        && let Some(refusal) = refusal.as_ref().filter(|refusal| !refusal.is_empty())
953    {
954        segments.push(refusal.clone());
955    }
956
957    if segments.is_empty() {
958        None
959    } else {
960        Some(segments.join("\n"))
961    }
962}
963
964#[derive(Clone, Debug, Serialize, Deserialize)]
965pub struct Choice {
966    pub index: usize,
967    pub message: Message,
968    pub logprobs: Option<serde_json::Value>,
969    pub finish_reason: String,
970}
971
972#[derive(Clone, Debug, Deserialize, Serialize, Default)]
973pub struct PromptTokensDetails {
974    /// Cached tokens from prompt caching
975    #[serde(default)]
976    pub cached_tokens: usize,
977}
978
979#[derive(Clone, Debug, Deserialize, Serialize)]
980pub struct Usage {
981    pub prompt_tokens: usize,
982    pub total_tokens: usize,
983    #[serde(skip_serializing_if = "Option::is_none")]
984    pub prompt_tokens_details: Option<PromptTokensDetails>,
985}
986
987impl Usage {
988    pub fn new() -> Self {
989        Self {
990            prompt_tokens: 0,
991            total_tokens: 0,
992            prompt_tokens_details: None,
993        }
994    }
995}
996
997impl Default for Usage {
998    fn default() -> Self {
999        Self::new()
1000    }
1001}
1002
1003impl fmt::Display for Usage {
1004    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1005        let Usage {
1006            prompt_tokens,
1007            total_tokens,
1008            ..
1009        } = self;
1010        write!(
1011            f,
1012            "Prompt tokens: {prompt_tokens} Total tokens: {total_tokens}"
1013        )
1014    }
1015}
1016
1017impl GetTokenUsage for Usage {
1018    fn token_usage(&self) -> Option<crate::completion::Usage> {
1019        Some(crate::providers::internal::completion_usage(
1020            self.prompt_tokens as u64,
1021            (self.total_tokens - self.prompt_tokens) as u64,
1022            self.total_tokens as u64,
1023            self.prompt_tokens_details
1024                .as_ref()
1025                .map(|d| d.cached_tokens as u64)
1026                .unwrap_or(0),
1027        ))
1028    }
1029}
1030
1031#[doc(hidden)]
1032#[derive(Clone)]
1033pub struct GenericCompletionModel<Ext = super::OpenAICompletionsExt, H = reqwest::Client> {
1034    pub(crate) client: crate::client::Client<Ext, H>,
1035    pub model: String,
1036    pub strict_tools: bool,
1037    pub tool_result_array_content: bool,
1038}
1039
1040/// The completion model struct for OpenAI's Chat Completions API.
1041///
1042/// This preserves the historical public generic shape where the first generic
1043/// parameter is the HTTP client type.
1044pub type CompletionModel<H = reqwest::Client> =
1045    GenericCompletionModel<super::OpenAICompletionsExt, H>;
1046
1047impl<Ext, H> GenericCompletionModel<Ext, H>
1048where
1049    crate::client::Client<Ext, H>: std::fmt::Debug + Clone + 'static,
1050    Ext: crate::client::Provider + Clone + 'static,
1051{
1052    pub fn new(client: crate::client::Client<Ext, H>, model: impl Into<String>) -> Self {
1053        Self {
1054            client,
1055            model: model.into(),
1056            strict_tools: false,
1057            tool_result_array_content: false,
1058        }
1059    }
1060
1061    pub fn with_model(client: crate::client::Client<Ext, H>, model: &str) -> Self {
1062        Self {
1063            client,
1064            model: model.into(),
1065            strict_tools: false,
1066            tool_result_array_content: false,
1067        }
1068    }
1069
1070    /// Enable strict mode for tool schemas.
1071    ///
1072    /// When enabled, tool schemas are automatically sanitized to meet OpenAI's strict mode requirements:
1073    /// - `additionalProperties: false` is added to all objects
1074    /// - All properties are marked as required
1075    /// - `strict: true` is set on each function definition
1076    ///
1077    /// This allows OpenAI to guarantee that the model's tool calls will match the schema exactly.
1078    pub fn with_strict_tools(mut self) -> Self {
1079        self.strict_tools = true;
1080        self
1081    }
1082
1083    pub fn with_tool_result_array_content(mut self) -> Self {
1084        self.tool_result_array_content = true;
1085        self
1086    }
1087}
1088
1089#[derive(Debug, Serialize, Deserialize, Clone)]
1090pub struct CompletionRequest {
1091    model: String,
1092    messages: Vec<Message>,
1093    #[serde(skip_serializing_if = "Vec::is_empty")]
1094    tools: Vec<ToolDefinition>,
1095    #[serde(skip_serializing_if = "Option::is_none")]
1096    tool_choice: Option<ToolChoice>,
1097    #[serde(skip_serializing_if = "Option::is_none")]
1098    temperature: Option<f64>,
1099    #[serde(skip_serializing_if = "Option::is_none")]
1100    max_tokens: Option<u64>,
1101    #[serde(flatten)]
1102    additional_params: Option<serde_json::Value>,
1103}
1104
1105pub struct OpenAIRequestParams {
1106    pub model: String,
1107    pub request: CoreCompletionRequest,
1108    pub strict_tools: bool,
1109    pub tool_result_array_content: bool,
1110}
1111
1112impl TryFrom<OpenAIRequestParams> for CompletionRequest {
1113    type Error = CompletionError;
1114
1115    fn try_from(params: OpenAIRequestParams) -> Result<Self, Self::Error> {
1116        let OpenAIRequestParams {
1117            model,
1118            request: req,
1119            strict_tools,
1120            tool_result_array_content,
1121        } = params;
1122
1123        let mut partial_history = vec![];
1124        if let Some(docs) = req.normalized_documents() {
1125            partial_history.push(docs);
1126        }
1127        let CoreCompletionRequest {
1128            model: request_model,
1129            preamble,
1130            chat_history,
1131            tools,
1132            temperature,
1133            max_tokens,
1134            additional_params,
1135            tool_choice,
1136            output_schema,
1137            ..
1138        } = req;
1139
1140        partial_history.extend(chat_history);
1141
1142        let mut full_history: Vec<Message> =
1143            preamble.map_or_else(Vec::new, |preamble| vec![Message::system(&preamble)]);
1144
1145        full_history.extend(
1146            partial_history
1147                .into_iter()
1148                .map(message::Message::try_into)
1149                .collect::<Result<Vec<Vec<Message>>, _>>()?
1150                .into_iter()
1151                .flatten()
1152                .collect::<Vec<_>>(),
1153        );
1154
1155        if full_history.is_empty() {
1156            return Err(CompletionError::RequestError(
1157                std::io::Error::new(
1158                    std::io::ErrorKind::InvalidInput,
1159                    "OpenAI Chat Completions request has no provider-compatible messages after conversion",
1160                )
1161                .into(),
1162            ));
1163        }
1164
1165        if tool_result_array_content {
1166            for msg in &mut full_history {
1167                if let Message::ToolResult { content, .. } = msg {
1168                    *content = content.to_array();
1169                }
1170            }
1171        }
1172
1173        let history_has_tool_result = history_contains_tool_result(&full_history);
1174
1175        let tool_choice = tool_choice.map(ToolChoice::try_from).transpose()?;
1176
1177        let tools: Vec<ToolDefinition> = tools
1178            .into_iter()
1179            .map(|tool| {
1180                let def = ToolDefinition::from(tool);
1181                if strict_tools { def.with_strict() } else { def }
1182            })
1183            .collect();
1184
1185        // Some OpenAI-compatible backends such as llama.cpp will skip tool execution
1186        // if `response_format` is sent on the first turn alongside tools. Delay the
1187        // schema until after the conversation contains a tool result.
1188        let should_apply_response_format =
1189            output_schema.is_some() && (tools.is_empty() || history_has_tool_result);
1190
1191        // Map output_schema to OpenAI's response_format and merge into additional_params
1192        let additional_params = if let Some(schema) = output_schema
1193            && should_apply_response_format
1194        {
1195            let name = schema
1196                .as_object()
1197                .and_then(|o| o.get("title"))
1198                .and_then(|v| v.as_str())
1199                .unwrap_or("response_schema")
1200                .to_string();
1201            let mut schema_value = schema.to_value();
1202            super::sanitize_schema(&mut schema_value);
1203            let response_format = serde_json::json!({
1204                "response_format": {
1205                    "type": "json_schema",
1206                    "json_schema": {
1207                        "name": name,
1208                        "strict": true,
1209                        "schema": schema_value
1210                    }
1211                }
1212            });
1213            Some(match additional_params {
1214                Some(existing) => json_utils::merge(existing, response_format),
1215                None => response_format,
1216            })
1217        } else {
1218            additional_params
1219        };
1220
1221        let res = Self {
1222            model: request_model.unwrap_or(model),
1223            messages: full_history,
1224            tools,
1225            tool_choice,
1226            temperature,
1227            max_tokens,
1228            additional_params,
1229        };
1230
1231        Ok(res)
1232    }
1233}
1234
1235impl TryFrom<(String, CoreCompletionRequest)> for CompletionRequest {
1236    type Error = CompletionError;
1237
1238    fn try_from((model, req): (String, CoreCompletionRequest)) -> Result<Self, Self::Error> {
1239        CompletionRequest::try_from(OpenAIRequestParams {
1240            model,
1241            request: req,
1242            strict_tools: false,
1243            tool_result_array_content: false,
1244        })
1245    }
1246}
1247
1248impl crate::telemetry::ProviderRequestExt for CompletionRequest {
1249    type InputMessage = Message;
1250
1251    fn get_input_messages(&self) -> Vec<Self::InputMessage> {
1252        self.messages.clone()
1253    }
1254
1255    fn get_system_prompt(&self) -> Option<String> {
1256        let first_message = self.messages.first()?;
1257
1258        let Message::System { ref content, .. } = first_message.clone() else {
1259            return None;
1260        };
1261
1262        let SystemContent { text, .. } = content.first();
1263
1264        Some(text)
1265    }
1266
1267    fn get_prompt(&self) -> Option<String> {
1268        let last_message = self.messages.last()?;
1269
1270        let Message::User { ref content, .. } = last_message.clone() else {
1271            return None;
1272        };
1273
1274        let UserContent::Text { text } = content.first() else {
1275            return None;
1276        };
1277
1278        Some(text)
1279    }
1280
1281    fn get_model_name(&self) -> String {
1282        self.model.clone()
1283    }
1284}
1285
1286impl GenericCompletionModel<super::OpenAICompletionsExt, reqwest::Client> {
1287    pub fn into_agent_builder(self) -> crate::agent::AgentBuilder<Self> {
1288        crate::agent::AgentBuilder::new(self)
1289    }
1290}
1291
1292impl<Ext, H> completion::CompletionModel for GenericCompletionModel<Ext, H>
1293where
1294    crate::client::Client<Ext, H>:
1295        HttpClientExt + Clone + WasmCompatSend + WasmCompatSync + 'static,
1296    Ext: crate::client::Provider
1297        + crate::client::DebugExt
1298        + Clone
1299        + WasmCompatSend
1300        + WasmCompatSync
1301        + 'static,
1302    H: Clone + Default + std::fmt::Debug + WasmCompatSend + WasmCompatSync + 'static,
1303{
1304    type Response = CompletionResponse;
1305    type StreamingResponse = StreamingCompletionResponse;
1306
1307    type Client = crate::client::Client<Ext, H>;
1308
1309    fn make(client: &Self::Client, model: impl Into<String>) -> Self {
1310        Self::new(client.clone(), model)
1311    }
1312
1313    async fn completion(
1314        &self,
1315        completion_request: CoreCompletionRequest,
1316    ) -> Result<completion::CompletionResponse<CompletionResponse>, CompletionError> {
1317        let span = if tracing::Span::current().is_disabled() {
1318            info_span!(
1319                target: "rig::completions",
1320                "chat",
1321                gen_ai.operation.name = "chat",
1322                gen_ai.provider.name = "openai",
1323                gen_ai.request.model = self.model,
1324                gen_ai.system_instructions = &completion_request.preamble,
1325                gen_ai.response.id = tracing::field::Empty,
1326                gen_ai.response.model = tracing::field::Empty,
1327                gen_ai.usage.output_tokens = tracing::field::Empty,
1328                gen_ai.usage.input_tokens = tracing::field::Empty,
1329                gen_ai.usage.cache_read.input_tokens = tracing::field::Empty,
1330            )
1331        } else {
1332            tracing::Span::current()
1333        };
1334
1335        let request = CompletionRequest::try_from(OpenAIRequestParams {
1336            model: self.model.to_owned(),
1337            request: completion_request,
1338            strict_tools: self.strict_tools,
1339            tool_result_array_content: self.tool_result_array_content,
1340        })?;
1341
1342        if enabled!(Level::TRACE) {
1343            tracing::trace!(
1344                target: "rig::completions",
1345                "OpenAI Chat Completions completion request: {}",
1346                serde_json::to_string_pretty(&request)?
1347            );
1348        }
1349
1350        let body = serde_json::to_vec(&request)?;
1351
1352        let req = self
1353            .client
1354            .post("/chat/completions")?
1355            .body(body)
1356            .map_err(|e| CompletionError::HttpError(e.into()))?;
1357
1358        async move {
1359            let response = self.client.send(req).await?;
1360
1361            if response.status().is_success() {
1362                let text = http_client::text(response).await?;
1363
1364                match serde_json::from_str::<ApiResponse<CompletionResponse>>(&text)? {
1365                    ApiResponse::Ok(response) => {
1366                        let span = tracing::Span::current();
1367                        span.record_response_metadata(&response);
1368                        span.record_token_usage(&response.usage);
1369
1370                        if enabled!(Level::TRACE) {
1371                            tracing::trace!(
1372                                target: "rig::completions",
1373                                "OpenAI Chat Completions completion response: {}",
1374                                serde_json::to_string_pretty(&response)?
1375                            );
1376                        }
1377
1378                        response.try_into()
1379                    }
1380                    ApiResponse::Err(err) => Err(CompletionError::ProviderError(err.message)),
1381                }
1382            } else {
1383                let text = http_client::text(response).await?;
1384                Err(CompletionError::ProviderError(text))
1385            }
1386        }
1387        .instrument(span)
1388        .await
1389    }
1390
1391    async fn stream(
1392        &self,
1393        request: CoreCompletionRequest,
1394    ) -> Result<
1395        crate::streaming::StreamingCompletionResponse<Self::StreamingResponse>,
1396        CompletionError,
1397    > {
1398        GenericCompletionModel::stream(self, request).await
1399    }
1400}
1401
1402fn serialize_assistant_content_vec<S>(
1403    value: &Vec<AssistantContent>,
1404    serializer: S,
1405) -> Result<S::Ok, S::Error>
1406where
1407    S: Serializer,
1408{
1409    if value.is_empty() {
1410        serializer.serialize_str("")
1411    } else {
1412        value.serialize(serializer)
1413    }
1414}
1415
1416#[cfg(test)]
1417mod tests {
1418    use super::*;
1419    use crate::telemetry::ProviderResponseExt;
1420
1421    #[test]
1422    fn test_openai_request_uses_request_model_override() {
1423        let request = crate::completion::CompletionRequest {
1424            model: Some("gpt-4.1".to_string()),
1425            preamble: None,
1426            chat_history: crate::OneOrMany::one("Hello".into()),
1427            documents: vec![],
1428            tools: vec![],
1429            temperature: None,
1430            max_tokens: None,
1431            tool_choice: None,
1432            additional_params: None,
1433            output_schema: None,
1434        };
1435
1436        let openai_request = CompletionRequest::try_from(OpenAIRequestParams {
1437            model: "gpt-4o-mini".to_string(),
1438            request,
1439            strict_tools: false,
1440            tool_result_array_content: false,
1441        })
1442        .expect("request conversion should succeed");
1443        let serialized =
1444            serde_json::to_value(openai_request).expect("serialization should succeed");
1445
1446        assert_eq!(serialized["model"], "gpt-4.1");
1447    }
1448
1449    #[test]
1450    fn test_openai_request_uses_default_model_when_override_unset() {
1451        let request = crate::completion::CompletionRequest {
1452            model: None,
1453            preamble: None,
1454            chat_history: crate::OneOrMany::one("Hello".into()),
1455            documents: vec![],
1456            tools: vec![],
1457            temperature: None,
1458            max_tokens: None,
1459            tool_choice: None,
1460            additional_params: None,
1461            output_schema: None,
1462        };
1463
1464        let openai_request = CompletionRequest::try_from(OpenAIRequestParams {
1465            model: "gpt-4o-mini".to_string(),
1466            request,
1467            strict_tools: false,
1468            tool_result_array_content: false,
1469        })
1470        .expect("request conversion should succeed");
1471        let serialized =
1472            serde_json::to_value(openai_request).expect("serialization should succeed");
1473
1474        assert_eq!(serialized["model"], "gpt-4o-mini");
1475    }
1476
1477    #[test]
1478    fn assistant_reasoning_alone_is_dropped() {
1479        let assistant_content = OneOrMany::one(message::AssistantContent::reasoning("hidden"));
1480
1481        let converted: Vec<Message> = assistant_content
1482            .try_into()
1483            .expect("conversion should work");
1484
1485        assert!(converted.is_empty());
1486    }
1487
1488    // Regression test: providers that serve thinking models over the OpenAI
1489    // Chat Completions schema (DeepSeek-R1, GLM-4.6, Qwen3-Thinking) return
1490    // 400 "thinking is enabled but reasoning_content is missing" on the next
1491    // turn if the prior assistant tool-call message didn't echo the reasoning.
1492    #[test]
1493    fn assistant_reasoning_is_attached_to_tool_call_message() {
1494        let assistant_content = OneOrMany::many(vec![
1495            message::AssistantContent::reasoning("hidden"),
1496            message::AssistantContent::text("visible"),
1497            message::AssistantContent::tool_call(
1498                "call_1",
1499                "subtract",
1500                serde_json::json!({"x": 2, "y": 1}),
1501            ),
1502        ])
1503        .expect("non-empty assistant content");
1504
1505        let converted: Vec<Message> = assistant_content
1506            .try_into()
1507            .expect("conversion should work");
1508        assert_eq!(converted.len(), 1);
1509
1510        match &converted[0] {
1511            Message::Assistant {
1512                content,
1513                tool_calls,
1514                reasoning,
1515                ..
1516            } => {
1517                assert_eq!(
1518                    content,
1519                    &vec![AssistantContent::Text {
1520                        text: "visible".to_string()
1521                    }]
1522                );
1523                assert_eq!(tool_calls.len(), 1);
1524                assert_eq!(tool_calls[0].id, "call_1");
1525                assert_eq!(tool_calls[0].function.name, "subtract");
1526                assert_eq!(
1527                    tool_calls[0].function.arguments,
1528                    serde_json::json!({"x": 2, "y": 1})
1529                );
1530                assert_eq!(reasoning.as_deref(), Some("hidden"));
1531            }
1532            _ => panic!("expected assistant message"),
1533        }
1534
1535        let json = serde_json::to_value(&converted[0]).expect("serialize");
1536        assert_eq!(json["reasoning_content"], "hidden");
1537    }
1538
1539    #[test]
1540    fn assistant_reasoning_roundtrips_back_to_rig_message() {
1541        let assistant = Message::Assistant {
1542            content: vec![AssistantContent::Text {
1543                text: "visible".to_string(),
1544            }],
1545            reasoning: Some("hidden".to_string()),
1546            refusal: None,
1547            audio: None,
1548            name: None,
1549            tool_calls: vec![],
1550        };
1551
1552        let rig_msg: message::Message = assistant.try_into().expect("convert back");
1553
1554        let message::Message::Assistant { content, .. } = rig_msg else {
1555            panic!("expected assistant");
1556        };
1557
1558        let items: Vec<_> = content.into_iter().collect();
1559        assert_eq!(items.len(), 2);
1560        assert!(matches!(items[0], message::AssistantContent::Reasoning(_)));
1561        assert!(matches!(items[1], message::AssistantContent::Text(_)));
1562    }
1563
1564    #[test]
1565    fn provider_response_text_response_reads_assistant_multipart_output() {
1566        let response = CompletionResponse {
1567            id: "resp_123".to_owned(),
1568            object: "chat.completion".to_owned(),
1569            created: 0,
1570            model: GPT_4O.to_owned(),
1571            system_fingerprint: None,
1572            choices: vec![Choice {
1573                index: 0,
1574                message: Message::Assistant {
1575                    content: vec![
1576                        AssistantContent::Text {
1577                            text: "first".to_owned(),
1578                        },
1579                        AssistantContent::Refusal {
1580                            refusal: "second".to_owned(),
1581                        },
1582                        AssistantContent::Text {
1583                            text: "third".to_owned(),
1584                        },
1585                    ],
1586                    reasoning: Some("hidden".to_owned()),
1587                    refusal: None,
1588                    audio: None,
1589                    name: None,
1590                    tool_calls: vec![],
1591                },
1592                logprobs: None,
1593                finish_reason: "stop".to_owned(),
1594            }],
1595            usage: None,
1596        };
1597
1598        assert_eq!(
1599            response.get_text_response(),
1600            Some("first\nsecond\nthird".to_owned())
1601        );
1602    }
1603
1604    #[test]
1605    fn provider_response_text_response_falls_back_to_assistant_refusal_field() {
1606        let response = CompletionResponse {
1607            id: "resp_123".to_owned(),
1608            object: "chat.completion".to_owned(),
1609            created: 0,
1610            model: GPT_4O.to_owned(),
1611            system_fingerprint: None,
1612            choices: vec![Choice {
1613                index: 0,
1614                message: Message::Assistant {
1615                    content: vec![],
1616                    reasoning: None,
1617                    refusal: Some("blocked".to_owned()),
1618                    audio: None,
1619                    name: None,
1620                    tool_calls: vec![],
1621                },
1622                logprobs: None,
1623                finish_reason: "stop".to_owned(),
1624            }],
1625            usage: None,
1626        };
1627
1628        assert_eq!(response.get_text_response(), Some("blocked".to_owned()));
1629    }
1630
1631    #[test]
1632    fn test_max_tokens_is_forwarded_to_request() {
1633        let request = crate::completion::CompletionRequest {
1634            model: None,
1635            preamble: None,
1636            chat_history: crate::OneOrMany::one("Hello".into()),
1637            documents: vec![],
1638            tools: vec![],
1639            temperature: None,
1640            max_tokens: Some(4096),
1641            tool_choice: None,
1642            additional_params: None,
1643            output_schema: None,
1644        };
1645
1646        let openai_request = CompletionRequest::try_from(OpenAIRequestParams {
1647            model: "gpt-4o-mini".to_string(),
1648            request,
1649            strict_tools: false,
1650            tool_result_array_content: false,
1651        })
1652        .expect("request conversion should succeed");
1653        let serialized =
1654            serde_json::to_value(openai_request).expect("serialization should succeed");
1655
1656        assert_eq!(serialized["max_tokens"], 4096);
1657    }
1658
1659    #[test]
1660    fn test_max_tokens_omitted_when_none() {
1661        let request = crate::completion::CompletionRequest {
1662            model: None,
1663            preamble: None,
1664            chat_history: crate::OneOrMany::one("Hello".into()),
1665            documents: vec![],
1666            tools: vec![],
1667            temperature: None,
1668            max_tokens: None,
1669            tool_choice: None,
1670            additional_params: None,
1671            output_schema: None,
1672        };
1673
1674        let openai_request = CompletionRequest::try_from(OpenAIRequestParams {
1675            model: "gpt-4o-mini".to_string(),
1676            request,
1677            strict_tools: false,
1678            tool_result_array_content: false,
1679        })
1680        .expect("request conversion should succeed");
1681        let serialized =
1682            serde_json::to_value(openai_request).expect("serialization should succeed");
1683
1684        assert!(serialized.get("max_tokens").is_none());
1685    }
1686
1687    #[test]
1688    fn request_conversion_errors_when_all_messages_are_filtered() {
1689        let request = CoreCompletionRequest {
1690            model: None,
1691            preamble: None,
1692            chat_history: OneOrMany::one(message::Message::Assistant {
1693                id: None,
1694                content: OneOrMany::one(message::AssistantContent::reasoning("hidden")),
1695            }),
1696            documents: vec![],
1697            tools: vec![],
1698            temperature: None,
1699            max_tokens: None,
1700            tool_choice: None,
1701            additional_params: None,
1702            output_schema: None,
1703        };
1704
1705        let result = CompletionRequest::try_from(OpenAIRequestParams {
1706            model: "gpt-4o-mini".to_string(),
1707            request,
1708            strict_tools: false,
1709            tool_result_array_content: false,
1710        });
1711
1712        assert!(matches!(result, Err(CompletionError::RequestError(_))));
1713    }
1714
1715    #[test]
1716    fn request_conversion_omits_response_format_on_initial_tool_turn() {
1717        let request = CoreCompletionRequest {
1718            model: None,
1719            preamble: None,
1720            chat_history: OneOrMany::one(message::Message::user(
1721                "Hello, whats the weather in London?",
1722            )),
1723            documents: vec![],
1724            tools: vec![completion::ToolDefinition {
1725                name: "weather".to_string(),
1726                description: "Get the weather".to_string(),
1727                parameters: serde_json::json!({
1728                    "type": "object",
1729                    "properties": {
1730                        "city": { "type": "string" }
1731                    },
1732                    "required": ["city"]
1733                }),
1734            }],
1735            temperature: None,
1736            max_tokens: None,
1737            tool_choice: None,
1738            additional_params: None,
1739            output_schema: Some(
1740                serde_json::from_value(serde_json::json!({
1741                    "title": "WeatherResponse",
1742                    "type": "object",
1743                    "properties": {
1744                        "city": { "type": "string" },
1745                        "weather": { "type": "string" }
1746                    },
1747                    "required": ["city", "weather"]
1748                }))
1749                .expect("schema should deserialize"),
1750            ),
1751        };
1752
1753        let openai_request = CompletionRequest::try_from(OpenAIRequestParams {
1754            model: "gpt-4o-mini".to_string(),
1755            request,
1756            strict_tools: false,
1757            tool_result_array_content: false,
1758        })
1759        .expect("request conversion should succeed");
1760
1761        let serialized =
1762            serde_json::to_value(openai_request).expect("serialization should succeed");
1763
1764        assert!(
1765            serialized.get("response_format").is_none(),
1766            "initial tool turn should omit response_format: {serialized:?}"
1767        );
1768    }
1769
1770    #[test]
1771    fn request_conversion_restores_response_format_after_tool_result() {
1772        let request = CoreCompletionRequest {
1773            model: None,
1774            preamble: None,
1775            chat_history: OneOrMany::many(vec![
1776                message::Message::user("Hello, whats the weather in London?"),
1777                message::Message::Assistant {
1778                    id: None,
1779                    content: OneOrMany::one(message::AssistantContent::tool_call(
1780                        "call_1",
1781                        "weather",
1782                        serde_json::json!({ "city": "London" }),
1783                    )),
1784                },
1785                message::Message::tool_result(
1786                    "call_1",
1787                    "The weather in London is all fire and brimstone",
1788                ),
1789            ])
1790            .expect("history should be non-empty"),
1791            documents: vec![],
1792            tools: vec![completion::ToolDefinition {
1793                name: "weather".to_string(),
1794                description: "Get the weather".to_string(),
1795                parameters: serde_json::json!({
1796                    "type": "object",
1797                    "properties": {
1798                        "city": { "type": "string" }
1799                    },
1800                    "required": ["city"]
1801                }),
1802            }],
1803            temperature: None,
1804            max_tokens: None,
1805            tool_choice: None,
1806            additional_params: None,
1807            output_schema: Some(
1808                serde_json::from_value(serde_json::json!({
1809                    "title": "WeatherResponse",
1810                    "type": "object",
1811                    "properties": {
1812                        "city": { "type": "string" },
1813                        "weather": { "type": "string" }
1814                    },
1815                    "required": ["city", "weather"]
1816                }))
1817                .expect("schema should deserialize"),
1818            ),
1819        };
1820
1821        let openai_request = CompletionRequest::try_from(OpenAIRequestParams {
1822            model: "gpt-4o-mini".to_string(),
1823            request,
1824            strict_tools: false,
1825            tool_result_array_content: false,
1826        })
1827        .expect("request conversion should succeed");
1828
1829        let serialized =
1830            serde_json::to_value(openai_request).expect("serialization should succeed");
1831
1832        assert!(
1833            serialized.get("response_format").is_some(),
1834            "follow-up turn should restore response_format: {serialized:?}"
1835        );
1836    }
1837
1838    #[test]
1839    fn deserialize_llama_cpp_tool_call() {
1840        let request = r#"{
1841            "choices": [{
1842                "finish_reason": "tool_calls",
1843                "index": 0,
1844                "message": {
1845                    "role": "assistant",
1846                    "content": "",
1847                    "tool_calls": [{ "type": "function", "function": { "name": "hello_world", "arguments": { "city": "Paris" } }, "id": "xxx" }]
1848                }
1849            }],
1850            "created": 0,
1851            "model": "gpt-4o-mini",
1852            "system_fingerprint": "fp_xxx",
1853            "object": "chat.completion",
1854            "usage": { "completion_tokens": 13, "prompt_tokens": 255, "total_tokens": 268 },
1855            "id": "xxx"
1856        }
1857        "#;
1858        let response = serde_json::from_str::<ApiResponse<CompletionResponse>>(request).unwrap();
1859
1860        let ApiResponse::Ok(response) = response else {
1861            panic!("expected successful completion response");
1862        };
1863        assert_eq!(response.choices.len(), 1);
1864
1865        let Message::Assistant { tool_calls, .. } = &response.choices[0].message else {
1866            panic!("expected assistant message");
1867        };
1868        assert_eq!(tool_calls.len(), 1);
1869        assert_eq!(tool_calls[0].id, "xxx");
1870        assert_eq!(tool_calls[0].function.name, "hello_world");
1871        assert_eq!(
1872            tool_calls[0].function.arguments,
1873            serde_json::json!({"city": "Paris"})
1874        );
1875    }
1876
1877    #[test]
1878    fn deserialize_openai_stringified_tool_call() {
1879        let request = r#"{
1880            "choices": [{
1881                "finish_reason": "tool_calls",
1882                "index": 0,
1883                "message": {
1884                    "role": "assistant",
1885                    "content": "",
1886                    "tool_calls": [{ "type": "function", "function": { "name": "hello_world", "arguments": "{\"city\":\"Paris\"}" }, "id": "xxx" }]
1887                }
1888            }],
1889            "created": 0,
1890            "model": "gpt-4o-mini",
1891            "system_fingerprint": "fp_xxx",
1892            "object": "chat.completion",
1893            "usage": { "completion_tokens": 13, "prompt_tokens": 255, "total_tokens": 268 },
1894            "id": "xxx"
1895        }
1896        "#;
1897        let response = serde_json::from_str::<ApiResponse<CompletionResponse>>(request).unwrap();
1898
1899        let ApiResponse::Ok(response) = response else {
1900            panic!("expected successful completion response");
1901        };
1902        assert_eq!(response.choices.len(), 1);
1903
1904        let Message::Assistant { tool_calls, .. } = &response.choices[0].message else {
1905            panic!("expected assistant message");
1906        };
1907        assert_eq!(tool_calls.len(), 1);
1908        assert_eq!(tool_calls[0].id, "xxx");
1909        assert_eq!(tool_calls[0].function.name, "hello_world");
1910        assert_eq!(
1911            tool_calls[0].function.arguments,
1912            serde_json::json!({"city": "Paris"})
1913        );
1914    }
1915
1916    #[test]
1917    fn deserialize_llama_cpp_response_with_reasoning_content() {
1918        let request = r#"
1919        {
1920            "choices": [
1921                {
1922                    "finish_reason": "stop",
1923                    "index": 0,
1924                    "message": {
1925                        "role": "assistant",
1926                        "content": "",
1927                        "reasoning_content": "Now I understand the structure better. I need to: ..."
1928                    }
1929                }
1930            ],
1931            "created": 1776750378,
1932            "model": "unsloth/Qwen3.6-35B-A3B-GGUF:Q8_0",
1933            "system_fingerprint": "fp_xxx",
1934            "object": "chat.completion",
1935            "usage": {
1936                "completion_tokens": 920,
1937                "prompt_tokens": 27806,
1938                "total_tokens": 28726,
1939                "prompt_tokens_details": { "cached_tokens": 18698 }
1940            },
1941            "id": "chatcmpl-xxxx",
1942            "timings": {
1943                "cache_n": 18698,
1944                "prompt_n": 9108,
1945                "prompt_ms": 226645.81,
1946                "prompt_per_token_ms": 24.884256697408873,
1947                "prompt_per_second": 40.186050648807495,
1948                "predicted_n": 920,
1949                "predicted_ms": 177167.955,
1950                "predicted_per_token_ms": 192.57386413043477,
1951                "predicted_per_second": 5.192812661860888
1952            }
1953        }
1954        "#;
1955        let response = serde_json::from_str::<ApiResponse<CompletionResponse>>(request).unwrap();
1956        let ApiResponse::Ok(response) = response else {
1957            panic!("expected successful completion response");
1958        };
1959
1960        let response: completion::CompletionResponse<CompletionResponse> =
1961            response.try_into().unwrap();
1962
1963        assert_eq!(response.choice.len(), 1);
1964
1965        let completion::message::AssistantContent::Reasoning(reasoning) = response.choice.first()
1966        else {
1967            panic!("expected assistant content to be reasoning");
1968        };
1969        assert_eq!(
1970            reasoning.first_text(),
1971            Some("Now I understand the structure better. I need to: ...")
1972        );
1973    }
1974}