openai_interface/chat/create/
response.rs

1//! This module provides structures for streaming and non-streaming
2//! chat completion responses.
3
4pub mod streaming {
5    //! Streaming chat completion response.
6
7    use std::str::FromStr;
8
9    use serde::Deserialize;
10
11    use crate::{chat::ServiceTier, errors::OapiError};
12
13    #[derive(Debug, Deserialize, Clone)]
14    pub struct ChatCompletionChunk {
15        /// A unique identifier for the chat completion.
16        pub id: String,
17        /// A list of chat completion choices. Can be more than one
18        /// if `n` is greater than 1. Can also be empty for the last chunk if you set
19        /// `stream_options: {"include_usage": true}`.
20        pub choices: Vec<CompletionChunkChoice>,
21        /// The Unix timestamp (in seconds) of when the chat completion was created.
22        /// Each chunk has the same timestamp.
23        pub created: u64,
24        /// The model used for the chat completion.
25        pub model: String,
26        /// The object type, which is always `chat.completion.chunk`
27        pub object: ChatCompletionChunkObject,
28        /// This fingerprint represents the backend configuration that the model runs with.
29        /// Can be used in conjunction with the `seed` request parameter to understand when
30        /// backend changes have been made that might impact determinism.
31        pub service_tier: Option<ServiceTier>,
32        /// This fingerprint represents the backend configuration that the model runs with.
33        /// Can be used in conjunction with the `seed` request parameter to understand when
34        /// backend changes have been made that might impact determinism.
35        pub system_fingerprint: Option<String>,
36        /// An optional field that will only be present when you set
37        /// `stream_options: {"include_usage": true}` in your request. When present, it
38        /// contains a null value **except for the last chunk** which contains the token
39        /// usage statistics for the entire request.
40        ///
41        /// **NOTE:** If the stream is interrupted or cancelled, you may not receive the
42        /// final usage chunk which contains the total token usage for the request.
43        pub usage: Option<CompletionUsage>,
44    }
45
46    #[derive(Debug, Deserialize, Clone)]
47    pub enum ChatCompletionChunkObject {
48        #[serde(rename = "chat.completion.chunk")]
49        ChatCompletionChunk,
50    }
51
52    #[derive(Debug, Deserialize, Clone)]
53    pub struct CompletionChunkChoice {
54        /// A chat completion delta generated by streamed model responses.
55        pub delta: ChoiceDelta,
56        /// The index of the choice in the list of choices.
57        pub index: u32,
58        /// Log probability information for the choice.
59        pub logprobs: Option<ChoiceLogprobs>,
60        /// The reason the model stopped generating tokens.
61        ///
62        /// This will be `stop` if the model hit a natural stop point or a provided stop
63        /// sequence, `length` if the maximum number of tokens specified in the request was
64        /// reached, `content_filter` if content was omitted due to a flag from our content
65        /// filters, `tool_calls` if the model called a tool, or `function_call`
66        /// (deprecated) if the model called a function.
67        pub finish_reason: Option<FinishReason>,
68    }
69
70    #[derive(Debug, Deserialize, Clone)]
71    #[serde(rename_all = "snake_case")]
72    pub enum FinishReason {
73        /// The maximum number of tokens specified in the request was reached.
74        Length,
75        /// The model hit a natural stop point or a provided stop sequence.
76        Stop,
77        /// Content was omitted due to a flag from our content filters.
78        ContentFilter,
79        /// The model called a function (deprecated).
80        FunctionCall,
81        /// The model called a tool.
82        ToolCalls,
83        /// This choice can only be found in the manual of DeepSeek.
84        InsufficientSystemResource,
85    }
86
87    #[derive(Debug, Deserialize, Clone)]
88    pub struct ChoiceDelta {
89        /// The contents of the chunk message.
90        #[serde(flatten)]
91        pub content: Option<CompletionContent>,
92        /// Deprecated and replaced by `tool_calls`.
93        ///
94        /// The name and arguments of a function that should be called, as generated by the
95        /// model.
96        pub function_call: Option<ChoiceDeltaFunctionCall>,
97        /// The refusal message generated by the model.
98        pub refusal: Option<String>,
99        /// The role of the author of this message.
100        pub role: Option<CompletionRole>,
101        /// A list of tool calls generated by the model, such as function calls.
102        pub tool_calls: Option<Vec<ChoiceDeltaToolCall>>,
103    }
104
105    #[derive(Debug, Deserialize, Clone)]
106    pub struct ChoiceDeltaToolCallFunction {
107        /// The arguments to call the function with, as generated by the model in JSON
108        /// format. Note that the model does not always generate valid JSON, and may
109        /// hallucinate parameters not defined by your function schema. Validate the
110        /// arguments in your code before calling your function.
111        pub arguments: Option<String>,
112        /// The name of the function to call.
113        pub name: Option<String>,
114    }
115
116    #[derive(Debug, Deserialize, Clone)]
117    pub struct ChoiceDeltaFunctionCall {
118        /// The arguments to call the function with, as generated by the model in JSON
119        /// format. Note that the model does not always generate valid JSON, and may
120        /// hallucinate parameters not defined by your function schema. Validate the
121        /// arguments in your code before calling your function.
122        pub arguments: Option<String>,
123        /// The name of the function to call.
124        pub name: Option<String>,
125    }
126
127    #[derive(Debug, Deserialize, Clone)]
128    pub struct ChoiceDeltaToolCall {
129        /// The index of the tool call in the list of tool calls.
130        pub index: usize,
131        /// The ID of the tool call.
132        pub id: Option<String>,
133        /// The function that the model called.
134        pub function: Option<ChoiceDeltaToolCallFunction>,
135        /// The type of the tool. Currently, only `function` is supported.
136        #[serde(rename = "type")]
137        pub type_: Option<ChoiceDeltaToolCallType>,
138    }
139
140    #[derive(Debug, Deserialize, Clone)]
141    #[serde(rename_all = "snake_case")]
142    pub enum ChoiceDeltaToolCallType {
143        Function,
144    }
145
146    #[derive(Debug, Deserialize, Clone)]
147    #[serde(rename_all = "snake_case")]
148    pub enum CompletionRole {
149        Assistant,
150        Developer,
151        System,
152        Tool,
153        User,
154    }
155
156    #[derive(Debug, Deserialize, Clone)]
157    #[serde(rename_all = "snake_case")]
158    pub enum CompletionContent {
159        Content(String),
160        /// For deepseek-reasoner model only.
161        ReasoningContent(String),
162    }
163
164    #[derive(Debug, Deserialize, Clone)]
165    #[serde(rename_all = "snake_case")]
166    pub enum ChoiceLogprobs {
167        Content(Vec<LogprobeContent>),
168        /// For deepseek-reasoner model only.
169        ReasoningContent(Vec<LogprobeContent>),
170    }
171
172    /// A list of message content tokens with log probability information.
173    #[derive(Debug, Deserialize, Clone)]
174    pub struct LogprobeContent {
175        pub token: String,
176        pub logprob: f32,
177        pub bytes: Option<Vec<u8>>,
178        pub top_logprobs: Vec<TopLogprob>,
179    }
180
181    /// List of the most likely tokens and their log probability, at this
182    /// token position. In rare cases, there may be fewer than the number of requested top_logprobs returned.
183    #[derive(Debug, Deserialize, Clone)]
184    pub struct TopLogprob {
185        pub token: String,
186        pub logprob: f32,
187        pub bytes: Option<Vec<u8>>,
188    }
189
190    #[derive(Debug, Deserialize, Clone)]
191    pub struct CompletionUsage {
192        /// Number of tokens in the generated completion.
193        pub completion_tokens: usize,
194        /// Number of tokens in the prompt.
195        pub prompt_tokens: usize,
196
197        // These two fields seem to be DeepSeek specific.
198        /// Number of tokens in the prompt that hits the context cache.
199        pub prompt_cache_hit_tokens: Option<usize>,
200        /// Number of tokens in the prompt that misses the context cache.
201        pub prompt_cache_miss_tokens: Option<usize>,
202
203        /// Total number of tokens used in the request (prompt + completion).
204        pub total_tokens: usize,
205        /// Breakdown of tokens used in a completion.
206        pub completion_tokens_details: Option<CompletionTokensDetails>,
207        /// Breakdown of tokens used in the prompt.
208        pub prompt_tokens_details: Option<PromptTokensDetails>,
209    }
210
211    #[derive(Debug, Deserialize, Clone)]
212    pub struct CompletionTokensDetails {
213        /// When using Predicted Outputs, the number of tokens in the prediction that
214        /// appeared in the completion.
215        pub accepted_prediction_tokens: Option<usize>,
216        /// Audio input tokens generated by the model.
217        pub audio_tokens: Option<usize>,
218        /// Tokens generated by the model for reasoning.
219        pub reasoning_tokens: Option<usize>,
220        /// When using Predicted Outputs, the number of tokens in the prediction that did
221        /// not appear in the completion. However, like reasoning tokens, these tokens are
222        /// still counted in the total completion tokens for purposes of billing, output,
223        /// and context window limits.
224        pub rejected_prediction_tokens: Option<usize>,
225    }
226
227    #[derive(Debug, Deserialize, Clone)]
228    pub struct PromptTokensDetails {
229        /// Audio input tokens present in the prompt.
230        pub audio_tokens: Option<usize>,
231        /// Cached tokens present in the prompt.
232        pub cached_tokens: Option<usize>,
233    }
234
235    impl FromStr for ChatCompletionChunk {
236        type Err = crate::errors::OapiError;
237
238        fn from_str(content: &str) -> Result<Self, Self::Err> {
239            let parse_result: Result<ChatCompletionChunk, _> = serde_json::from_str(content)
240                .map_err(|e| OapiError::DeserializationError(e.to_string()));
241            parse_result
242        }
243    }
244
245    #[cfg(test)]
246    mod test {
247        use super::*;
248
249        #[test]
250        fn streaming_example_deepseek() {
251            let streams = vec![
252                r#"{"id": "1f633d8bfc032625086f14113c411638", "choices": [{"index": 0, "delta": {"content": "", "role": "assistant"}, "finish_reason": null, "logprobs": null}], "created": 1718345013, "model": "deepseek-chat", "system_fingerprint": "fp_a49d71b8a1", "object": "chat.completion.chunk", "usage": null}"#,
253                r#"{"choices": [{"delta": {"content": "Hello", "role": "assistant"}, "finish_reason": null, "index": 0, "logprobs": null}], "created": 1718345013, "id": "1f633d8bfc032625086f14113c411638", "model": "deepseek-chat", "object": "chat.completion.chunk", "system_fingerprint": "fp_a49d71b8a1"}"#,
254                r#"{"choices": [{"delta": {"content": "!", "role": "assistant"}, "finish_reason": null, "index": 0, "logprobs": null}], "created": 1718345013, "id": "1f633d8bfc032625086f14113c411638", "model": "deepseek-chat", "object": "chat.completion.chunk", "system_fingerprint": "fp_a49d71b8a1"}"#,
255                r#"{"choices": [{"delta": {"content": " How", "role": "assistant"}, "finish_reason": null, "index": 0, "logprobs": null}], "created": 1718345013, "id": "1f633d8bfc032625086f14113c411638", "model": "deepseek-chat", "object": "chat.completion.chunk", "system_fingerprint": "fp_a49d71b8a1"}"#,
256                r#"{"choices": [{"delta": {"content": " can", "role": "assistant"}, "finish_reason": null, "index": 0, "logprobs": null}], "created": 1718345013, "id": "1f633d8bfc032625086f14113c411638", "model": "deepseek-chat", "object": "chat.completion.chunk", "system_fingerprint": "fp_a49d71b8a1"}"#,
257                r#"{"choices": [{"delta": {"content": " I", "role": "assistant"}, "finish_reason": null, "index": 0, "logprobs": null}], "created": 1718345013, "id": "1f633d8bfc032625086f14113c411638", "model": "deepseek-chat", "object": "chat.completion.chunk", "system_fingerprint": "fp_a49d71b8a1"}"#,
258                r#"{"choices": [{"delta": {"content": " assist", "role": "assistant"}, "finish_reason": null, "index": 0, "logprobs": null}], "created": 1718345013, "id": "1f633d8bfc032625086f14113c411638", "model": "deepseek-chat", "object": "chat.completion.chunk", "system_fingerprint": "fp_a49d71b8a1"}"#,
259                r#"{"choices": [{"delta": {"content": " you", "role": "assistant"}, "finish_reason": null, "index": 0, "logprobs": null}], "created": 1718345013, "id": "1f633d8bfc032625086f14113c411638", "model": "deepseek-chat", "object": "chat.completion.chunk", "system_fingerprint": "fp_a49d71b8a1"}"#,
260                r#"{"choices": [{"delta": {"content": " today", "role": "assistant"}, "finish_reason": null, "index": 0, "logprobs": null}], "created": 1718345013, "id": "1f633d8bfc032625086f14113c411638", "model": "deepseek-chat", "object": "chat.completion.chunk", "system_fingerprint": "fp_a49d71b8a1"}"#,
261                r#"{"choices": [{"delta": {"content": "?", "role": "assistant"}, "finish_reason": null, "index": 0, "logprobs": null}], "created": 1718345013, "id": "1f633d8bfc032625086f14113c411638", "model": "deepseek-chat", "object": "chat.completion.chunk", "system_fingerprint": "fp_a49d71b8a1"}"#,
262                r#"{"choices": [{"delta": {"content": "", "role": null}, "finish_reason": "stop", "index": 0, "logprobs": null}], "created": 1718345013, "id": "1f633d8bfc032625086f14113c411638", "model": "deepseek-chat", "object": "chat.completion.chunk", "system_fingerprint": "fp_a49d71b8a1", "usage": {"completion_tokens": 9, "prompt_tokens": 17, "total_tokens": 26}}"#,
263            ];
264
265            for stream in streams {
266                let parsed = ChatCompletionChunk::from_str(stream);
267                match parsed {
268                    Ok(completion) => {
269                        println!("Deserialized: {:#?}", completion);
270                    }
271                    Err(e) => {
272                        panic!("Failed to deserialize {}: {}", stream, e);
273                    }
274                }
275            }
276        }
277
278        #[test]
279        fn streaming_example_qwen() {
280            let streams = vec![
281                r#"{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"","function_call":null,"refusal":null,"role":"assistant","tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}"#,
282                r#"{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"我是","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}"#,
283                r#"{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"来自","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}"#,
284                r#"{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"阿里","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}"#,
285                r#"{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"云的超大规模","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}"#,
286                r#"{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"语言模型,我","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}"#,
287                r#"{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"叫通义千","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}"#,
288                r#"{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"问。","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}"#,
289                r#"{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":"stop","index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}"#,
290                r#"{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":{"completion_tokens":17,"prompt_tokens":22,"total_tokens":39,"completion_tokens_details":null,"prompt_tokens_details":{"audio_tokens":null,"cached_tokens":0}}}"#,
291            ];
292
293            for stream in streams {
294                let parsed = ChatCompletionChunk::from_str(stream);
295                match parsed {
296                    Ok(completion) => {
297                        println!("Deserialized: {:#?}", completion);
298                    }
299                    Err(e) => {
300                        panic!("Failed to deserialize {}: {}", stream, e);
301                    }
302                }
303            }
304        }
305    }
306}
307
308pub mod no_streaming {
309    //! Non-streaming chat completion response.
310
311    /// Alias for `crate::chat::ChatCompletion`, which is shared
312    /// by many other modules. This alias is for compatibility.
313    pub type ChatCompletion = crate::chat::ChatCompletion;
314}