openai_interface/chat/create/
response.rs

1pub mod streaming {
2    use std::str::FromStr;
3
4    use serde::Deserialize;
5
6    use crate::{chat::ServiceTier, errors::OapiError};
7
8    #[derive(Debug, Deserialize, Clone)]
9    pub struct ChatCompletionChunk {
10        /// A unique identifier for the chat completion.
11        pub id: String,
12        /// A list of chat completion choices. Can be more than one
13        /// if `n` is greater than 1. Can also be empty for the last chunk if you set
14        /// `stream_options: {"include_usage": true}`.
15        pub choices: Vec<CompletionChunkChoice>,
16        /// The Unix timestamp (in seconds) of when the chat completion was created.
17        /// Each chunk has the same timestamp.
18        pub created: u64,
19        /// The model used for the chat completion.
20        pub model: String,
21        /// The object type, which is always `chat.completion.chunk`
22        pub object: ChatCompletionChunkObject,
23        /// This fingerprint represents the backend configuration that the model runs with.
24        /// Can be used in conjunction with the `seed` request parameter to understand when
25        /// backend changes have been made that might impact determinism.
26        pub service_tier: Option<ServiceTier>,
27        /// This fingerprint represents the backend configuration that the model runs with.
28        /// Can be used in conjunction with the `seed` request parameter to understand when
29        /// backend changes have been made that might impact determinism.
30        pub system_fingerprint: Option<String>,
31        /// An optional field that will only be present when you set
32        /// `stream_options: {"include_usage": true}` in your request. When present, it
33        /// contains a null value **except for the last chunk** which contains the token
34        /// usage statistics for the entire request.
35        ///
36        /// **NOTE:** If the stream is interrupted or cancelled, you may not receive the
37        /// final usage chunk which contains the total token usage for the request.
38        pub usage: Option<CompletionUsage>,
39    }
40
41    #[derive(Debug, Deserialize, Clone)]
42    pub enum ChatCompletionChunkObject {
43        #[serde(rename = "chat.completion.chunk")]
44        ChatCompletionChunk,
45    }
46
47    #[derive(Debug, Deserialize, Clone)]
48    pub struct CompletionChunkChoice {
49        /// A chat completion delta generated by streamed model responses.
50        pub delta: ChoiceDelta,
51        /// The index of the choice in the list of choices.
52        pub index: u32,
53        /// Log probability information for the choice.
54        pub logprobs: Option<ChoiceLogprobs>,
55        /// The reason the model stopped generating tokens.
56        ///
57        /// This will be `stop` if the model hit a natural stop point or a provided stop
58        /// sequence, `length` if the maximum number of tokens specified in the request was
59        /// reached, `content_filter` if content was omitted due to a flag from our content
60        /// filters, `tool_calls` if the model called a tool, or `function_call`
61        /// (deprecated) if the model called a function.
62        pub finish_reason: Option<FinishReason>,
63    }
64
65    #[derive(Debug, Deserialize, Clone)]
66    #[serde(rename_all = "snake_case")]
67    pub enum FinishReason {
68        /// The maximum number of tokens specified in the request was reached.
69        Length,
70        /// The model hit a natural stop point or a provided stop sequence.
71        Stop,
72        /// Content was omitted due to a flag from our content filters.
73        ContentFilter,
74        /// The model called a function (deprecated).
75        FunctionCall,
76        /// The model called a tool.
77        ToolCalls,
78        /// This choice can only be found in the manual of DeepSeek.
79        InsufficientSystemResource,
80    }
81
82    #[derive(Debug, Deserialize, Clone)]
83    pub struct ChoiceDelta {
84        /// The contents of the chunk message.
85        #[serde(flatten)]
86        pub content: Option<CompletionContent>,
87        /// Deprecated and replaced by `tool_calls`.
88        ///
89        /// The name and arguments of a function that should be called, as generated by the
90        /// model.
91        pub function_call: Option<ChoiceDeltaFunctionCall>,
92        /// The refusal message generated by the model.
93        pub refusal: Option<String>,
94        /// The role of the author of this message.
95        pub role: Option<CompletionRole>,
96        /// A list of tool calls generated by the model, such as function calls.
97        pub tool_calls: Option<Vec<ChoiceDeltaToolCall>>,
98    }
99
100    #[derive(Debug, Deserialize, Clone)]
101    pub struct ChoiceDeltaToolCallFunction {
102        /// The arguments to call the function with, as generated by the model in JSON
103        /// format. Note that the model does not always generate valid JSON, and may
104        /// hallucinate parameters not defined by your function schema. Validate the
105        /// arguments in your code before calling your function.
106        pub arguments: Option<String>,
107        /// The name of the function to call.
108        pub name: Option<String>,
109    }
110
111    #[derive(Debug, Deserialize, Clone)]
112    pub struct ChoiceDeltaFunctionCall {
113        /// The arguments to call the function with, as generated by the model in JSON
114        /// format. Note that the model does not always generate valid JSON, and may
115        /// hallucinate parameters not defined by your function schema. Validate the
116        /// arguments in your code before calling your function.
117        pub arguments: Option<String>,
118        /// The name of the function to call.
119        pub name: Option<String>,
120    }
121
122    #[derive(Debug, Deserialize, Clone)]
123    pub struct ChoiceDeltaToolCall {
124        /// The index of the tool call in the list of tool calls.
125        pub index: usize,
126        /// The ID of the tool call.
127        pub id: Option<String>,
128        /// The function that the model called.
129        pub function: Option<ChoiceDeltaToolCallFunction>,
130        /// The type of the tool. Currently, only `function` is supported.
131        #[serde(rename = "type")]
132        pub type_: Option<ChoiceDeltaToolCallType>,
133    }
134
135    #[derive(Debug, Deserialize, Clone)]
136    #[serde(rename_all = "snake_case")]
137    pub enum ChoiceDeltaToolCallType {
138        Function,
139    }
140
141    #[derive(Debug, Deserialize, Clone)]
142    #[serde(rename_all = "snake_case")]
143    pub enum CompletionRole {
144        Assistant,
145        Developer,
146        System,
147        Tool,
148        User,
149    }
150
151    #[derive(Debug, Deserialize, Clone)]
152    #[serde(rename_all = "snake_case")]
153    pub enum CompletionContent {
154        Content(String),
155        /// For deepseek-reasoner model only.
156        ReasoningContent(String),
157    }
158
159    #[derive(Debug, Deserialize, Clone)]
160    #[serde(rename_all = "snake_case")]
161    pub enum ChoiceLogprobs {
162        Content(Vec<LogprobeContent>),
163        /// For deepseek-reasoner model only.
164        ReasoningContent(Vec<LogprobeContent>),
165    }
166
167    /// A list of message content tokens with log probability information.
168    #[derive(Debug, Deserialize, Clone)]
169    pub struct LogprobeContent {
170        pub token: String,
171        pub logprob: f32,
172        pub bytes: Option<Vec<u8>>,
173        pub top_logprobs: Vec<TopLogprob>,
174    }
175
176    /// List of the most likely tokens and their log probability, at this
177    /// token position. In rare cases, there may be fewer than the number of requested top_logprobs returned.
178    #[derive(Debug, Deserialize, Clone)]
179    pub struct TopLogprob {
180        pub token: String,
181        pub logprob: f32,
182        pub bytes: Option<Vec<u8>>,
183    }
184
185    #[derive(Debug, Deserialize, Clone)]
186    pub struct CompletionUsage {
187        /// Number of tokens in the generated completion.
188        pub completion_tokens: usize,
189        /// Number of tokens in the prompt.
190        pub prompt_tokens: usize,
191
192        // These two fields seem to be DeepSeek specific.
193        /// Number of tokens in the prompt that hits the context cache.
194        pub prompt_cache_hit_tokens: Option<usize>,
195        /// Number of tokens in the prompt that misses the context cache.
196        pub prompt_cache_miss_tokens: Option<usize>,
197
198        /// Total number of tokens used in the request (prompt + completion).
199        pub total_tokens: usize,
200        /// Breakdown of tokens used in a completion.
201        pub completion_tokens_details: Option<CompletionTokensDetails>,
202        /// Breakdown of tokens used in the prompt.
203        pub prompt_tokens_details: Option<PromptTokensDetails>,
204    }
205
206    #[derive(Debug, Deserialize, Clone)]
207    pub struct CompletionTokensDetails {
208        /// When using Predicted Outputs, the number of tokens in the prediction that
209        /// appeared in the completion.
210        pub accepted_prediction_tokens: Option<usize>,
211        /// Audio input tokens generated by the model.
212        pub audio_tokens: Option<usize>,
213        /// Tokens generated by the model for reasoning.
214        pub reasoning_tokens: Option<usize>,
215        /// When using Predicted Outputs, the number of tokens in the prediction that did
216        /// not appear in the completion. However, like reasoning tokens, these tokens are
217        /// still counted in the total completion tokens for purposes of billing, output,
218        /// and context window limits.
219        pub rejected_prediction_tokens: Option<usize>,
220    }
221
222    #[derive(Debug, Deserialize, Clone)]
223    pub struct PromptTokensDetails {
224        /// Audio input tokens present in the prompt.
225        pub audio_tokens: Option<usize>,
226        /// Cached tokens present in the prompt.
227        pub cached_tokens: Option<usize>,
228    }
229
230    impl FromStr for ChatCompletionChunk {
231        type Err = crate::errors::OapiError;
232
233        fn from_str(content: &str) -> Result<Self, Self::Err> {
234            let parse_result: Result<ChatCompletionChunk, _> = serde_json::from_str(content)
235                .map_err(|e| OapiError::DeserializationError(e.to_string()));
236            parse_result
237        }
238    }
239
240    #[cfg(test)]
241    mod test {
242        use super::*;
243
244        #[test]
245        fn streaming_example_deepseek() {
246            let streams = vec![
247                r#"{"id": "1f633d8bfc032625086f14113c411638", "choices": [{"index": 0, "delta": {"content": "", "role": "assistant"}, "finish_reason": null, "logprobs": null}], "created": 1718345013, "model": "deepseek-chat", "system_fingerprint": "fp_a49d71b8a1", "object": "chat.completion.chunk", "usage": null}"#,
248                r#"{"choices": [{"delta": {"content": "Hello", "role": "assistant"}, "finish_reason": null, "index": 0, "logprobs": null}], "created": 1718345013, "id": "1f633d8bfc032625086f14113c411638", "model": "deepseek-chat", "object": "chat.completion.chunk", "system_fingerprint": "fp_a49d71b8a1"}"#,
249                r#"{"choices": [{"delta": {"content": "!", "role": "assistant"}, "finish_reason": null, "index": 0, "logprobs": null}], "created": 1718345013, "id": "1f633d8bfc032625086f14113c411638", "model": "deepseek-chat", "object": "chat.completion.chunk", "system_fingerprint": "fp_a49d71b8a1"}"#,
250                r#"{"choices": [{"delta": {"content": " How", "role": "assistant"}, "finish_reason": null, "index": 0, "logprobs": null}], "created": 1718345013, "id": "1f633d8bfc032625086f14113c411638", "model": "deepseek-chat", "object": "chat.completion.chunk", "system_fingerprint": "fp_a49d71b8a1"}"#,
251                r#"{"choices": [{"delta": {"content": " can", "role": "assistant"}, "finish_reason": null, "index": 0, "logprobs": null}], "created": 1718345013, "id": "1f633d8bfc032625086f14113c411638", "model": "deepseek-chat", "object": "chat.completion.chunk", "system_fingerprint": "fp_a49d71b8a1"}"#,
252                r#"{"choices": [{"delta": {"content": " I", "role": "assistant"}, "finish_reason": null, "index": 0, "logprobs": null}], "created": 1718345013, "id": "1f633d8bfc032625086f14113c411638", "model": "deepseek-chat", "object": "chat.completion.chunk", "system_fingerprint": "fp_a49d71b8a1"}"#,
253                r#"{"choices": [{"delta": {"content": " assist", "role": "assistant"}, "finish_reason": null, "index": 0, "logprobs": null}], "created": 1718345013, "id": "1f633d8bfc032625086f14113c411638", "model": "deepseek-chat", "object": "chat.completion.chunk", "system_fingerprint": "fp_a49d71b8a1"}"#,
254                r#"{"choices": [{"delta": {"content": " you", "role": "assistant"}, "finish_reason": null, "index": 0, "logprobs": null}], "created": 1718345013, "id": "1f633d8bfc032625086f14113c411638", "model": "deepseek-chat", "object": "chat.completion.chunk", "system_fingerprint": "fp_a49d71b8a1"}"#,
255                r#"{"choices": [{"delta": {"content": " today", "role": "assistant"}, "finish_reason": null, "index": 0, "logprobs": null}], "created": 1718345013, "id": "1f633d8bfc032625086f14113c411638", "model": "deepseek-chat", "object": "chat.completion.chunk", "system_fingerprint": "fp_a49d71b8a1"}"#,
256                r#"{"choices": [{"delta": {"content": "?", "role": "assistant"}, "finish_reason": null, "index": 0, "logprobs": null}], "created": 1718345013, "id": "1f633d8bfc032625086f14113c411638", "model": "deepseek-chat", "object": "chat.completion.chunk", "system_fingerprint": "fp_a49d71b8a1"}"#,
257                r#"{"choices": [{"delta": {"content": "", "role": null}, "finish_reason": "stop", "index": 0, "logprobs": null}], "created": 1718345013, "id": "1f633d8bfc032625086f14113c411638", "model": "deepseek-chat", "object": "chat.completion.chunk", "system_fingerprint": "fp_a49d71b8a1", "usage": {"completion_tokens": 9, "prompt_tokens": 17, "total_tokens": 26}}"#,
258            ];
259
260            for stream in streams {
261                let parsed = ChatCompletionChunk::from_str(stream);
262                match parsed {
263                    Ok(completion) => {
264                        println!("Deserialized: {:#?}", completion);
265                    }
266                    Err(e) => {
267                        panic!("Failed to deserialize {}: {}", stream, e);
268                    }
269                }
270            }
271        }
272
273        #[test]
274        fn streaming_example_qwen() {
275            let streams = vec![
276                r#"{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"","function_call":null,"refusal":null,"role":"assistant","tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}"#,
277                r#"{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"我是","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}"#,
278                r#"{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"来自","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}"#,
279                r#"{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"阿里","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}"#,
280                r#"{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"云的超大规模","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}"#,
281                r#"{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"语言模型,我","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}"#,
282                r#"{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"叫通义千","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}"#,
283                r#"{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"问。","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}"#,
284                r#"{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[{"delta":{"content":"","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":"stop","index":0,"logprobs":null}],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":null}"#,
285                r#"{"id":"chatcmpl-e30f5ae7-3063-93c4-90fe-beb5f900bd57","choices":[],"created":1735113344,"model":"qwen-plus","object":"chat.completion.chunk","service_tier":null,"system_fingerprint":null,"usage":{"completion_tokens":17,"prompt_tokens":22,"total_tokens":39,"completion_tokens_details":null,"prompt_tokens_details":{"audio_tokens":null,"cached_tokens":0}}}"#,
286            ];
287
288            for stream in streams {
289                let parsed = ChatCompletionChunk::from_str(stream);
290                match parsed {
291                    Ok(completion) => {
292                        println!("Deserialized: {:#?}", completion);
293                    }
294                    Err(e) => {
295                        panic!("Failed to deserialize {}: {}", stream, e);
296                    }
297                }
298            }
299        }
300    }
301}
302
303pub mod no_streaming {
304    use std::str::FromStr;
305
306    use serde::Deserialize;
307
308    use crate::{chat::ServiceTier, errors::OapiError};
309
310    #[derive(Debug, Deserialize)]
311    pub struct ChatCompletion {
312        /// A unique identifier for the chat completion.
313        pub id: String,
314        /// A list of chat completion choices. Can be more than one
315        /// if `n` is greater than 1.
316        pub choices: Vec<Choice>,
317        /// The Unix timestamp (in seconds) of when the chat completion was created.
318        pub created: u64,
319        /// The model used for the chat completion.
320        pub model: String,
321        /// Specifies the processing type used for serving the request.
322        ///
323        /// - If set to 'auto', then the request will be processed with the service tier
324        ///   configured in the Project settings. Unless otherwise configured, the Project
325        ///   will use 'default'.
326        /// - If set to 'default', then the request will be processed with the standard
327        ///   pricing and performance for the selected model.
328        /// - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
329        ///   '[priority](https://openai.com/api-priority-processing/)', then the request
330        ///   will be processed with the corresponding service tier.
331        /// - When not set, the default behavior is 'auto'.
332        ///
333        /// When the `service_tier` parameter is set, the response body will include the
334        /// `service_tier` value based on the processing mode actually used to serve the
335        /// request. This response value may be different from the value set in the
336        /// parameter.
337        pub service_tier: Option<ServiceTier>,
338        /// The system fingerprint used for the chat completion.
339        /// Can be used in conjunction with the `seed` request parameter to understand when
340        /// backend changes have been made that might impact determinism.
341        pub system_fingerprint: Option<String>,
342        /// The object type, which is always `chat.completion`.
343        pub object: ChatCompletionObject,
344        /// Usage statistics for the completion request.
345        pub usage: Option<CompletionUsage>,
346    }
347
348    /// The object type, which is always `chat.completion`.
349    #[derive(Debug, Deserialize)]
350    pub enum ChatCompletionObject {
351        /// The object type is always `chat.completion`.
352        #[serde(rename = "chat.completion")]
353        ChatCompletion,
354    }
355
356    #[derive(Debug, Deserialize)]
357    pub struct Choice {
358        /// The reason the model stopped generating tokens.
359        ///
360        /// This will be `stop` if the model hit a natural stop point or a provided stop
361        /// sequence, `length` if the maximum number of tokens specified in the request was
362        /// reached, `content_filter` if content was omitted due to a flag from our content
363        /// filters, `tool_calls` if the model called a tool, or `function_call`
364        /// (deprecated) if the model called a function.
365        pub finish_reason: FinishReason,
366        /// The index of the choice in the list of choices.
367        pub index: usize,
368        /// Log probability information for the choice.
369        pub logprobs: Option<ChoiceLogprobs>,
370        /// A chat completion message generated by the model.
371        pub message: ChatCompletionMessage,
372    }
373
374    #[derive(Debug, Deserialize, PartialEq)]
375    #[serde(rename_all = "snake_case")]
376    pub enum FinishReason {
377        Length,
378        Stop,
379        ToolCalls,
380        FunctionCall,
381        ContentFilter,
382        /// This choice can only be found in the manual of DeepSeek
383        InsufficientSystemResource,
384    }
385
386    /// Fields that are not supported yet:
387    /// - _audio_: If the audio output modality is requested, this object contains
388    /// data about the audio response from the model.
389    /// [Learn more from OpenAI](https://platform.openai.com/docs/guides/audio).
390    #[derive(Debug, Deserialize)]
391    pub struct ChatCompletionMessage {
392        /// The role of the author of this message. This shall always
393        /// be ResponseRole::Assistant
394        pub role: ResponseRole,
395        /// The contents of the message.
396        pub content: Option<String>,
397        pub reasoning_content: Option<String>,
398        /// The tool calls generated by the model, such as function calls.
399        /// Tool calls deserialization is not supported yet.
400        pub tool_calls: Option<Vec<ChatCompletionMessageToolCall>>,
401    }
402
403    #[derive(Debug, Deserialize)]
404    #[serde(tag = "type", rename_all = "snake_case")]
405    pub enum ChatCompletionMessageToolCall {
406        /// The type of the tool. Currently, only `function` is supported.
407        /// The field { type = "function" } is added automatically.
408        Function {
409            /// The ID of the tool call.
410            id: String,
411            /// The function that the model called.
412            function: String, // function type
413        },
414        /// The type of the tool. Always `custom`.
415        /// The field { type = "custom" } is added automatically.
416        Custom {
417            /// The id of the tool call.
418            id: String,
419            /// The custom tool that the model called.
420            custom: MessageToolCallCustom,
421        },
422    }
423
424    #[derive(Debug, Deserialize)]
425    pub struct MessageToolCallCustom {
426        /// The input for the custom tool call generated by the model.
427        pub input: String,
428        /// The name of the custom tool to call.
429        pub name: String,
430    }
431
432    #[derive(Debug, Deserialize)]
433    pub struct MessageToolCallFunction {
434        /// The arguments to call the function with, as generated by the model in JSON
435        /// format. Note that the model does not always generate valid JSON, and may
436        /// hallucinate parameters not defined by your function schema. Validate the
437        /// arguments in your code before calling your function.
438        pub arguments: String,
439        /// The name of the function to call.
440        pub name: String,
441    }
442
443    #[derive(Debug, Deserialize)]
444    #[serde(rename_all = "snake_case")]
445    pub enum ResponseRole {
446        /// The role of the response message is always assistant.
447        Assistant,
448    }
449
450    #[derive(Debug, Deserialize)]
451    pub struct ChoiceLogprobs {
452        /// A list of message content tokens with log probability information.
453        pub content: Option<Vec<TokenLogProb>>,
454        /// Only found in DeepSeek's manual.
455        pub reasoning_content: Option<Vec<TokenLogProb>>,
456        /// A list of message refusal tokens with log probability information.
457        pub refusal: Option<Vec<TokenLogProb>>,
458    }
459
460    #[derive(Debug, Deserialize)]
461    pub struct TokenLogProb {
462        /// The token.
463        pub token: String,
464        /// The log probability of this token, if it is within the top 20 most likely
465        /// tokens. Otherwise, the value `-9999.0` is used to signify that the token is very
466        /// unlikely.
467        pub logprob: f32,
468        /// A list of integers representing the UTF-8 bytes representation of the token.
469        ///
470        /// Useful in instances where characters are represented by multiple tokens and
471        /// their byte representations must be combined to generate the correct text
472        /// representation. Can be `null` if there is no bytes representation for the token.
473        pub bytes: Option<Vec<u8>>,
474        /// List of the most likely tokens and their log probability, at this token
475        /// position. In rare cases, there may be fewer than the number of requested
476        /// `top_logprobs` returned.
477        pub top_logprobs: Vec<TopLogprob>,
478    }
479
480    #[derive(Debug, Deserialize)]
481    pub struct TopLogprob {
482        /// The token.
483        pub token: String,
484        /// A list of integers representing the UTF-8 bytes representation of the token.
485        ///
486        /// Useful in instances where characters are represented by multiple tokens and
487        /// their byte representations must be combined to generate the correct text
488        /// representation. Can be `null` if there is no bytes representation for the token.
489        pub logprob: f32,
490        /// List of the most likely tokens and their log probability, at this token
491        /// position. In rare cases, there may be fewer than the number of requested
492        /// `top_logprobs` returned.
493        pub bytes: Option<Vec<u8>>,
494    }
495
496    #[derive(Debug, Deserialize)]
497    pub struct CompletionUsage {
498        /// Number of tokens in the generated completion.
499        pub completion_tokens: usize,
500        /// Number of tokens in the prompt.
501        pub prompt_tokens: usize,
502
503        // These two fields seem to be DeepSeek specific.
504        /// Number of tokens in the prompt that hits the context cache.
505        pub prompt_cache_hit_tokens: Option<usize>,
506        /// Number of tokens in the prompt that misses the context cache.
507        pub prompt_cache_miss_tokens: Option<usize>,
508
509        /// Total number of tokens used in the request (prompt + completion).
510        pub total_tokens: usize,
511        /// Breakdown of tokens used in a completion.
512        pub completion_tokens_details: Option<CompletionTokensDetails>,
513        /// Breakdown of tokens used in the prompt.
514        pub prompt_tokens_details: Option<PromptTokensDetails>,
515    }
516
517    #[derive(Debug, Deserialize)]
518    pub struct CompletionTokensDetails {
519        /// When using Predicted Outputs, the number of tokens in the prediction that
520        /// appeared in the completion.
521        pub accepted_prediction_tokens: Option<usize>,
522        /// Audio input tokens generated by the model.
523        pub audio_tokens: Option<usize>,
524        /// Tokens generated by the model for reasoning.
525        pub reasoning_tokens: Option<usize>,
526        /// When using Predicted Outputs, the number of tokens in the prediction that did
527        /// not appear in the completion. However, like reasoning tokens, these tokens are
528        /// still counted in the total completion tokens for purposes of billing, output,
529        /// and context window limits.
530        pub rejected_prediction_tokens: Option<usize>,
531    }
532
533    #[derive(Debug, Deserialize)]
534    pub struct PromptTokensDetails {
535        /// Audio input tokens present in the prompt.
536        pub audio_tokens: Option<usize>,
537        /// Cached tokens present in the prompt.
538        pub cached_tokens: Option<usize>,
539    }
540
541    impl FromStr for ChatCompletion {
542        type Err = crate::errors::OapiError;
543
544        fn from_str(content: &str) -> Result<Self, Self::Err> {
545            let parse_result: Result<ChatCompletion, _> = serde_json::from_str(content)
546                .map_err(|e| OapiError::DeserializationError(e.to_string()));
547            parse_result
548        }
549    }
550
551    #[cfg(test)]
552    mod test {
553        use super::*;
554
555        #[test]
556        fn no_streaming_example_deepseek() {
557            let json = r#"{
558              "id": "30f6413a-a827-4cf3-9898-f13a8634b798",
559              "object": "chat.completion",
560              "created": 1757944111,
561              "model": "deepseek-chat",
562              "choices": [
563                {
564                  "index": 0,
565                  "message": {
566                    "role": "assistant",
567                    "content": "Hello! How can I help you today? 😊"
568                  },
569                  "logprobs": null,
570                  "finish_reason": "stop"
571                }
572              ],
573              "usage": {
574                "prompt_tokens": 10,
575                "completion_tokens": 11,
576                "total_tokens": 21,
577                "prompt_tokens_details": {
578                  "cached_tokens": 0
579                },
580                "prompt_cache_hit_tokens": 0,
581                "prompt_cache_miss_tokens": 10
582              },
583              "system_fingerprint": "fp_08f168e49b_prod0820_fp8_kvcache"
584            }"#;
585
586            let parsed = ChatCompletion::from_str(json);
587            match parsed {
588                Ok(_) => {}
589                Err(e) => {
590                    panic!("Failed to deserialize: {}", e);
591                }
592            }
593        }
594
595        #[test]
596        fn no_streaming_example_qwen() {
597            let json = r#"{
598                "choices": [
599                    {
600                        "message": {
601                            "role": "assistant",
602                            "content": "我是阿里云开发的一款超大规模语言模型,我叫通义千问。"
603                        },
604                        "finish_reason": "stop",
605                        "index": 0,
606                        "logprobs": null
607                    }
608                ],
609                "object": "chat.completion",
610                "usage": {
611                    "prompt_tokens": 3019,
612                    "completion_tokens": 104,
613                    "total_tokens": 3123,
614                    "prompt_tokens_details": {
615                        "cached_tokens": 2048
616                    }
617                },
618                "created": 1735120033,
619                "system_fingerprint": null,
620                "model": "qwen-plus",
621                "id": "chatcmpl-6ada9ed2-7f33-9de2-8bb0-78bd4035025a"
622            }"#;
623
624            let parsed = ChatCompletion::from_str(json);
625            match parsed {
626                Ok(_) => {}
627                Err(e) => {
628                    panic!("Failed to deserialize: {}", e);
629                }
630            }
631        }
632    }
633}