openai_interface/chat/create/
response.rs

1pub mod streaming {
2    use std::str::FromStr;
3
4    use serde::Deserialize;
5
6    use crate::{chat::ServiceTier, errors::OapiError};
7
8    #[derive(Debug, Deserialize, Clone)]
9    pub struct ChatCompletionChunk {
10        /// A unique identifier for the chat completion.
11        pub id: String,
12        /// A list of chat completion choices. Can be more than one
13        /// if `n` is greater than 1. Can also be empty for the last chunk if you set
14        /// `stream_options: {"include_usage": true}`.
15        pub choices: Vec<CompletionChunkChoice>,
16        /// The Unix timestamp (in seconds) of when the chat completion was created.
17        /// Each chunk has the same timestamp.
18        pub created: u64,
19        /// The model used for the chat completion.
20        pub model: String,
21        /// The object type, which is always `chat.completion.chunk`
22        pub object: ChatCompletionChunkObject,
23        /// This fingerprint represents the backend configuration that the model runs with.
24        /// Can be used in conjunction with the `seed` request parameter to understand when
25        /// backend changes have been made that might impact determinism.
26        pub service_tier: Option<ServiceTier>,
27        /// This fingerprint represents the backend configuration that the model runs with.
28        /// Can be used in conjunction with the `seed` request parameter to understand when
29        /// backend changes have been made that might impact determinism.
30        pub system_fingerprint: Option<String>,
31        /// An optional field that will only be present when you set
32        /// `stream_options: {"include_usage": true}` in your request. When present, it
33        /// contains a null value **except for the last chunk** which contains the token
34        /// usage statistics for the entire request.
35        ///
36        /// **NOTE:** If the stream is interrupted or cancelled, you may not receive the
37        /// final usage chunk which contains the total token usage for the request.
38        pub usage: Option<CompletionUsage>,
39    }
40
41    #[derive(Debug, Deserialize, Clone)]
42    pub enum ChatCompletionChunkObject {
43        #[serde(rename = "chat.completion.chunk")]
44        ChatCompletionChunk,
45    }
46
47    #[derive(Debug, Deserialize, Clone)]
48    pub struct CompletionChunkChoice {
49        /// A chat completion delta generated by streamed model responses.
50        pub delta: ChoiceDelta,
51        /// The index of the choice in the list of choices.
52        pub index: u32,
53        /// Log probability information for the choice.
54        pub logprobs: Option<ChoiceLogprobs>,
55        /// The reason the model stopped generating tokens.
56        ///
57        /// This will be `stop` if the model hit a natural stop point or a provided stop
58        /// sequence, `length` if the maximum number of tokens specified in the request was
59        /// reached, `content_filter` if content was omitted due to a flag from our content
60        /// filters, `tool_calls` if the model called a tool, or `function_call`
61        /// (deprecated) if the model called a function.
62        pub finish_reason: Option<FinishReason>,
63    }
64
65    #[derive(Debug, Deserialize, Clone)]
66    #[serde(rename_all = "snake_case")]
67    pub enum FinishReason {
68        /// The maximum number of tokens specified in the request was reached.
69        Length,
70        /// The model hit a natural stop point or a provided stop sequence.
71        Stop,
72        /// Content was omitted due to a flag from our content filters.
73        ContentFilter,
74        /// The model called a function (deprecated).
75        FunctionCall,
76        /// The model called a tool.
77        ToolCalls,
78        /// This choice can only be found in the manual of DeepSeek.
79        InsufficientSystemResource,
80    }
81
82    #[derive(Debug, Deserialize, Clone)]
83    pub struct ChoiceDelta {
84        /// The contents of the chunk message.
85        #[serde(flatten)]
86        pub content: Option<CompletionContent>,
87        /// Deprecated and replaced by `tool_calls`.
88        ///
89        /// The name and arguments of a function that should be called, as generated by the
90        /// model.
91        pub function_call: Option<ChoiceDeltaFunctionCall>,
92        /// The refusal message generated by the model.
93        pub refusal: Option<String>,
94        /// The role of the author of this message.
95        pub role: Option<CompletionRole>,
96        /// A list of tool calls generated by the model, such as function calls.
97        pub tool_calls: Option<Vec<ChoiceDeltaToolCall>>,
98    }
99
100    #[derive(Debug, Deserialize, Clone)]
101    pub struct ChoiceDeltaToolCallFunction {
102        /// The arguments to call the function with, as generated by the model in JSON
103        /// format. Note that the model does not always generate valid JSON, and may
104        /// hallucinate parameters not defined by your function schema. Validate the
105        /// arguments in your code before calling your function.
106        pub arguments: Option<String>,
107        /// The name of the function to call.
108        pub name: Option<String>,
109    }
110
111    #[derive(Debug, Deserialize, Clone)]
112    pub struct ChoiceDeltaFunctionCall {
113        /// The arguments to call the function with, as generated by the model in JSON
114        /// format. Note that the model does not always generate valid JSON, and may
115        /// hallucinate parameters not defined by your function schema. Validate the
116        /// arguments in your code before calling your function.
117        pub arguments: Option<String>,
118        /// The name of the function to call.
119        pub name: Option<String>,
120    }
121
122    #[derive(Debug, Deserialize, Clone)]
123    pub struct ChoiceDeltaToolCall {
124        /// The index of the tool call in the list of tool calls.
125        pub index: usize,
126        /// The ID of the tool call.
127        pub id: Option<String>,
128        /// The function that the model called.
129        pub function: Option<ChoiceDeltaToolCallFunction>,
130        /// The type of the tool. Currently, only `function` is supported.
131        #[serde(rename = "type")]
132        pub type_: Option<ChoiceDeltaToolCallType>,
133    }
134
135    #[derive(Debug, Deserialize, Clone)]
136    #[serde(rename_all = "snake_case")]
137    pub enum ChoiceDeltaToolCallType {
138        Function,
139    }
140
141    #[derive(Debug, Deserialize, Clone)]
142    #[serde(rename_all = "snake_case")]
143    pub enum CompletionRole {
144        Assistant,
145        Developer,
146        System,
147        Tool,
148        User,
149    }
150
151    #[derive(Debug, Deserialize, Clone)]
152    #[serde(rename_all = "snake_case")]
153    pub enum CompletionContent {
154        Content(String),
155        /// For deepseek-reasoner model only.
156        ReasoningContent(String),
157    }
158
159    #[derive(Debug, Deserialize, Clone)]
160    #[serde(rename_all = "snake_case")]
161    pub enum ChoiceLogprobs {
162        Content(Vec<LogprobeContent>),
163        /// For deepseek-reasoner model only.
164        ReasoningContent(Vec<LogprobeContent>),
165    }
166
167    /// A list of message content tokens with log probability information.
168    #[derive(Debug, Deserialize, Clone)]
169    pub struct LogprobeContent {
170        pub token: String,
171        pub logprob: f32,
172        pub bytes: Option<Vec<u8>>,
173        pub top_logprobs: Vec<TopLogprob>,
174    }
175
176    /// List of the most likely tokens and their log probability, at this
177    /// token position. In rare cases, there may be fewer than the number of requested top_logprobs returned.
178    #[derive(Debug, Deserialize, Clone)]
179    pub struct TopLogprob {
180        pub token: String,
181        pub logprob: f32,
182        pub bytes: Option<Vec<u8>>,
183    }
184
185    #[derive(Debug, Deserialize, Clone)]
186    pub struct CompletionUsage {
187        /// Number of tokens in the generated completion.
188        pub completion_tokens: usize,
189        /// Number of tokens in the prompt.
190        pub prompt_tokens: usize,
191
192        // These two fields seem to be DeepSeek specific.
193        /// Number of tokens in the prompt that hits the context cache.
194        pub prompt_cache_hit_tokens: Option<usize>,
195        /// Number of tokens in the prompt that misses the context cache.
196        pub prompt_cache_miss_tokens: Option<usize>,
197
198        /// Total number of tokens used in the request (prompt + completion).
199        pub total_tokens: usize,
200        /// Breakdown of tokens used in a completion.
201        pub completion_tokens_details: Option<CompletionTokensDetails>,
202        /// Breakdown of tokens used in the prompt.
203        pub prompt_tokens_details: Option<PromptTokensDetails>,
204    }
205
206    #[derive(Debug, Deserialize, Clone)]
207    pub struct CompletionTokensDetails {
208        /// When using Predicted Outputs, the number of tokens in the prediction that
209        /// appeared in the completion.
210        pub accepted_prediction_tokens: Option<usize>,
211        /// Audio input tokens generated by the model.
212        pub audio_tokens: Option<usize>,
213        /// Tokens generated by the model for reasoning.
214        pub reasoning_tokens: Option<usize>,
215        /// When using Predicted Outputs, the number of tokens in the prediction that did
216        /// not appear in the completion. However, like reasoning tokens, these tokens are
217        /// still counted in the total completion tokens for purposes of billing, output,
218        /// and context window limits.
219        pub rejected_prediction_tokens: Option<usize>,
220    }
221
222    #[derive(Debug, Deserialize, Clone)]
223    pub struct PromptTokensDetails {
224        /// Audio input tokens present in the prompt.
225        pub audio_tokens: Option<usize>,
226        /// Cached tokens present in the prompt.
227        pub cached_tokens: Option<usize>,
228    }
229
230    impl FromStr for ChatCompletionChunk {
231        type Err = crate::errors::OapiError;
232
233        fn from_str(content: &str) -> Result<Self, Self::Err> {
234            let parse_result: Result<ChatCompletionChunk, _> = serde_json::from_str(content)
235                .map_err(|e| OapiError::DeserializationError(e.to_string()));
236            parse_result
237        }
238    }
239
240    #[cfg(test)]
241    mod test {
242        use super::*;
243
244        #[test]
245        fn streaming_example_deepseek() {
246            let streams = vec![
247                r#"{"id": "1f633d8bfc032625086f14113c411638", "choices": [{"index": 0, "delta": {"content": "", "role": "assistant"}, "finish_reason": null, "logprobs": null}], "created": 1718345013, "model": "deepseek-chat", "system_fingerprint": "fp_a49d71b8a1", "object": "chat.completion.chunk", "usage": null}"#,
248                r#"{"choices": [{"delta": {"content": "Hello", "role": "assistant"}, "finish_reason": null, "index": 0, "logprobs": null}], "created": 1718345013, "id": "1f633d8bfc032625086f14113c411638", "model": "deepseek-chat", "object": "chat.completion.chunk", "system_fingerprint": "fp_a49d71b8a1"}"#,
249                r#"{"choices": [{"delta": {"content": "!", "role": "assistant"}, "finish_reason": null, "index": 0, "logprobs": null}], "created": 1718345013, "id": "1f633d8bfc032625086f14113c411638", "model": "deepseek-chat", "object": "chat.completion.chunk", "system_fingerprint": "fp_a49d71b8a1"}"#,
250                r#"{"choices": [{"delta": {"content": " How", "role": "assistant"}, "finish_reason": null, "index": 0, "logprobs": null}], "created": 1718345013, "id": "1f633d8bfc032625086f14113c411638", "model": "deepseek-chat", "object": "chat.completion.chunk", "system_fingerprint": "fp_a49d71b8a1"}"#,
251                r#"{"choices": [{"delta": {"content": " can", "role": "assistant"}, "finish_reason": null, "index": 0, "logprobs": null}], "created": 1718345013, "id": "1f633d8bfc032625086f14113c411638", "model": "deepseek-chat", "object": "chat.completion.chunk", "system_fingerprint": "fp_a49d71b8a1"}"#,
252                r#"{"choices": [{"delta": {"content": " I", "role": "assistant"}, "finish_reason": null, "index": 0, "logprobs": null}], "created": 1718345013, "id": "1f633d8bfc032625086f14113c411638", "model": "deepseek-chat", "object": "chat.completion.chunk", "system_fingerprint": "fp_a49d71b8a1"}"#,
253                r#"{"choices": [{"delta": {"content": " assist", "role": "assistant"}, "finish_reason": null, "index": 0, "logprobs": null}], "created": 1718345013, "id": "1f633d8bfc032625086f14113c411638", "model": "deepseek-chat", "object": "chat.completion.chunk", "system_fingerprint": "fp_a49d71b8a1"}"#,
254                r#"{"choices": [{"delta": {"content": " you", "role": "assistant"}, "finish_reason": null, "index": 0, "logprobs": null}], "created": 1718345013, "id": "1f633d8bfc032625086f14113c411638", "model": "deepseek-chat", "object": "chat.completion.chunk", "system_fingerprint": "fp_a49d71b8a1"}"#,
255                r#"{"choices": [{"delta": {"content": " today", "role": "assistant"}, "finish_reason": null, "index": 0, "logprobs": null}], "created": 1718345013, "id": "1f633d8bfc032625086f14113c411638", "model": "deepseek-chat", "object": "chat.completion.chunk", "system_fingerprint": "fp_a49d71b8a1"}"#,
256                r#"{"choices": [{"delta": {"content": "?", "role": "assistant"}, "finish_reason": null, "index": 0, "logprobs": null}], "created": 1718345013, "id": "1f633d8bfc032625086f14113c411638", "model": "deepseek-chat", "object": "chat.completion.chunk", "system_fingerprint": "fp_a49d71b8a1"}"#,
257                r#"{"choices": [{"delta": {"content": "", "role": null}, "finish_reason": "stop", "index": 0, "logprobs": null}], "created": 1718345013, "id": "1f633d8bfc032625086f14113c411638", "model": "deepseek-chat", "object": "chat.completion.chunk", "system_fingerprint": "fp_a49d71b8a1", "usage": {"completion_tokens": 9, "prompt_tokens": 17, "total_tokens": 26}}"#,
258            ];
259
260            for stream in streams {
261                let parsed = ChatCompletionChunk::from_str(stream);
262                match parsed {
263                    Ok(completion) => {
264                        println!("Deserialized: {:#?}", completion);
265                    }
266                    Err(e) => {
267                        panic!("Failed to deserialize {}: {}", stream, e);
268                    }
269                }
270            }
271        }
272
273        #[test]
274        fn streaming_example_qwen() {
275            let streams = vec![
276                r#"{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"","function_call":null,"refusal":null,"role":"assistant","tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}"#,
277                r#"{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"我是","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}"#,
278                r#"{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"来自","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}"#,
279                r#"{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"阿里","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}"#,
280                r#"{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"云的超大规模","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}"#,
281                r#"{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"语言模型,我","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}"#,
282                r#"{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"叫通义千","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}"#,
283                r#"{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"问。","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}"#,
284                r#"{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":"stop","index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}"#,
285                r#"{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":{"completion_tokens":17,"prompt_tokens":22,"total_tokens":39,"completion_tokens_details":null,"prompt_tokens_details":{"audio_tokens":null,"cached_tokens":0}}}"#,
286            ];
287
288            for stream in streams {
289                let parsed = ChatCompletionChunk::from_str(stream);
290                match parsed {
291                    Ok(completion) => {
292                        println!("Deserialized: {:#?}", completion);
293                    }
294                    Err(e) => {
295                        panic!("Failed to deserialize {}: {}", stream, e);
296                    }
297                }
298            }
299        }
300    }
301}
302
303pub mod no_streaming {
304    pub type ChatCompletion = crate::chat::ChatCompletion;
305}