openai_interface/chat/
mod.rs

1//! # Chat Completions API Module
2//!
3//! This module provides components shared by many submodules.
4
5use std::str::FromStr;
6
7use serde::{Deserialize, Serialize};
8
9use crate::errors::OapiError;
10
11pub mod create;
12pub mod delete;
13pub mod retrieve;
14pub mod update;
15
16/// The service tier used for processing the request.
17///
18/// This enum represents the different service tiers that can be specified when
19/// making a request to the API. Each tier corresponds to different performance
20/// characteristics and pricing models.
21#[derive(Debug, Serialize, Deserialize, Clone)]
22#[serde(rename_all = "lowercase")]
23pub enum ServiceTier {
24    /// Automatically select the service tier based on project settings.
25    Auto,
26    /// Use the default service tier with standard pricing and performance.
27    Default,
28    /// Use the flex service tier for flexible processing requirements.
29    Flex,
30    /// Use the scale service tier for scalable processing needs.
31    Scale,
32    /// Use the priority service tier for high-priority requests.
33    Priority,
34}
35
36#[derive(Debug, Deserialize)]
37pub struct ChatCompletion {
38    /// A unique identifier for the chat completion.
39    pub id: String,
40    /// A list of chat completion choices. Can be more than one
41    /// if `n` is greater than 1.
42    pub choices: Vec<Choice>,
43    /// The Unix timestamp (in seconds) of when the chat completion was created.
44    pub created: u64,
45    /// The model used for the chat completion.
46    pub model: String,
47    /// Specifies the processing type used for serving the request.
48    ///
49    /// - If set to 'auto', then the request will be processed with the service tier
50    ///   configured in the Project settings. Unless otherwise configured, the Project
51    ///   will use 'default'.
52    /// - If set to 'default', then the request will be processed with the standard
53    ///   pricing and performance for the selected model.
54    /// - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
55    ///   '[priority](https://openai.com/api-priority-processing/)', then the request
56    ///   will be processed with the corresponding service tier.
57    /// - When not set, the default behavior is 'auto'.
58    ///
59    /// When the `service_tier` parameter is set, the response body will include the
60    /// `service_tier` value based on the processing mode actually used to serve the
61    /// request. This response value may be different from the value set in the
62    /// parameter.
63    pub service_tier: Option<ServiceTier>,
64    /// The system fingerprint used for the chat completion.
65    /// Can be used in conjunction with the `seed` request parameter to understand when
66    /// backend changes have been made that might impact determinism.
67    pub system_fingerprint: Option<String>,
68    /// The object type, which is always `chat.completion`.
69    pub object: ChatCompletionObject,
70    /// Usage statistics for the completion request.
71    pub usage: Option<CompletionUsage>,
72}
73
74/// The object type, which is always `chat.completion`.
75#[derive(Debug, Deserialize)]
76pub enum ChatCompletionObject {
77    /// The object type is always `chat.completion`.
78    #[serde(rename = "chat.completion")]
79    ChatCompletion,
80}
81
82#[derive(Debug, Deserialize)]
83pub struct Choice {
84    /// The reason the model stopped generating tokens.
85    ///
86    /// This will be `stop` if the model hit a natural stop point or a provided stop
87    /// sequence, `length` if the maximum number of tokens specified in the request was
88    /// reached, `content_filter` if content was omitted due to a flag from our content
89    /// filters, `tool_calls` if the model called a tool, or `function_call`
90    /// (deprecated) if the model called a function.
91    pub finish_reason: FinishReason,
92    /// The index of the choice in the list of choices.
93    pub index: usize,
94    /// Log probability information for the choice.
95    pub logprobs: Option<ChoiceLogprobs>,
96    /// A chat completion message generated by the model.
97    pub message: ChatCompletionMessage,
98}
99
100#[derive(Debug, Deserialize, PartialEq)]
101#[serde(rename_all = "snake_case")]
102pub enum FinishReason {
103    Length,
104    Stop,
105    ToolCalls,
106    FunctionCall,
107    ContentFilter,
108    /// This choice can only be found in the manual of DeepSeek
109    InsufficientSystemResource,
110}
111
112/// Fields that are not supported yet:
113/// - _audio_: If the audio output modality is requested, this object contains
114/// data about the audio response from the model.
115/// [Learn more from OpenAI](https://platform.openai.com/docs/guides/audio).
116#[derive(Debug, Deserialize)]
117pub struct ChatCompletionMessage {
118    /// The role of the author of this message. This shall always
119    /// be ResponseRole::Assistant
120    pub role: ResponseRole,
121    /// The contents of the message.
122    pub content: Option<String>,
123    pub reasoning_content: Option<String>,
124    /// The tool calls generated by the model, such as function calls.
125    /// Tool calls deserialization is not supported yet.
126    pub tool_calls: Option<Vec<ChatCompletionMessageToolCall>>,
127}
128
129#[derive(Debug, Deserialize)]
130#[serde(tag = "type", rename_all = "snake_case")]
131pub enum ChatCompletionMessageToolCall {
132    /// The type of the tool. Currently, only `function` is supported.
133    /// The field { type = "function" } is added automatically.
134    Function {
135        /// The ID of the tool call.
136        id: String,
137        /// The function that the model called.
138        function: String, // function type
139    },
140    /// The type of the tool. Always `custom`.
141    /// The field { type = "custom" } is added automatically.
142    Custom {
143        /// The id of the tool call.
144        id: String,
145        /// The custom tool that the model called.
146        custom: MessageToolCallCustom,
147    },
148}
149
150#[derive(Debug, Deserialize)]
151pub struct MessageToolCallCustom {
152    /// The input for the custom tool call generated by the model.
153    pub input: String,
154    /// The name of the custom tool to call.
155    pub name: String,
156}
157
158#[derive(Debug, Deserialize)]
159pub struct MessageToolCallFunction {
160    /// The arguments to call the function with, as generated by the model in JSON
161    /// format. Note that the model does not always generate valid JSON, and may
162    /// hallucinate parameters not defined by your function schema. Validate the
163    /// arguments in your code before calling your function.
164    pub arguments: String,
165    /// The name of the function to call.
166    pub name: String,
167}
168
169#[derive(Debug, Deserialize)]
170#[serde(rename_all = "snake_case")]
171pub enum ResponseRole {
172    /// The role of the response message is always assistant.
173    Assistant,
174}
175
176#[derive(Debug, Deserialize)]
177pub struct ChoiceLogprobs {
178    /// A list of message content tokens with log probability information.
179    pub content: Option<Vec<TokenLogProb>>,
180    /// Only found in DeepSeek's manual.
181    pub reasoning_content: Option<Vec<TokenLogProb>>,
182    /// A list of message refusal tokens with log probability information.
183    pub refusal: Option<Vec<TokenLogProb>>,
184}
185
186#[derive(Debug, Deserialize)]
187pub struct TokenLogProb {
188    /// The token.
189    pub token: String,
190    /// The log probability of this token, if it is within the top 20 most likely
191    /// tokens. Otherwise, the value `-9999.0` is used to signify that the token is very
192    /// unlikely.
193    pub logprob: f32,
194    /// A list of integers representing the UTF-8 bytes representation of the token.
195    ///
196    /// Useful in instances where characters are represented by multiple tokens and
197    /// their byte representations must be combined to generate the correct text
198    /// representation. Can be `null` if there is no bytes representation for the token.
199    pub bytes: Option<Vec<u8>>,
200    /// List of the most likely tokens and their log probability, at this token
201    /// position. In rare cases, there may be fewer than the number of requested
202    /// `top_logprobs` returned.
203    pub top_logprobs: Vec<TopLogprob>,
204}
205
206#[derive(Debug, Deserialize)]
207pub struct TopLogprob {
208    /// The token.
209    pub token: String,
210    /// A list of integers representing the UTF-8 bytes representation of the token.
211    ///
212    /// Useful in instances where characters are represented by multiple tokens and
213    /// their byte representations must be combined to generate the correct text
214    /// representation. Can be `null` if there is no bytes representation for the token.
215    pub logprob: f32,
216    /// List of the most likely tokens and their log probability, at this token
217    /// position. In rare cases, there may be fewer than the number of requested
218    /// `top_logprobs` returned.
219    pub bytes: Option<Vec<u8>>,
220}
221
222#[derive(Debug, Deserialize)]
223pub struct CompletionUsage {
224    /// Number of tokens in the generated completion.
225    pub completion_tokens: usize,
226    /// Number of tokens in the prompt.
227    pub prompt_tokens: usize,
228
229    // These two fields seem to be DeepSeek specific.
230    /// Number of tokens in the prompt that hits the context cache.
231    pub prompt_cache_hit_tokens: Option<usize>,
232    /// Number of tokens in the prompt that misses the context cache.
233    pub prompt_cache_miss_tokens: Option<usize>,
234
235    /// Total number of tokens used in the request (prompt + completion).
236    pub total_tokens: usize,
237    /// Breakdown of tokens used in a completion.
238    pub completion_tokens_details: Option<CompletionTokensDetails>,
239    /// Breakdown of tokens used in the prompt.
240    pub prompt_tokens_details: Option<PromptTokensDetails>,
241}
242
243#[derive(Debug, Deserialize)]
244pub struct CompletionTokensDetails {
245    /// When using Predicted Outputs, the number of tokens in the prediction that
246    /// appeared in the completion.
247    pub accepted_prediction_tokens: Option<usize>,
248    /// Audio input tokens generated by the model.
249    pub audio_tokens: Option<usize>,
250    /// Tokens generated by the model for reasoning.
251    pub reasoning_tokens: Option<usize>,
252    /// When using Predicted Outputs, the number of tokens in the prediction that did
253    /// not appear in the completion. However, like reasoning tokens, these tokens are
254    /// still counted in the total completion tokens for purposes of billing, output,
255    /// and context window limits.
256    pub rejected_prediction_tokens: Option<usize>,
257}
258
259#[derive(Debug, Deserialize)]
260pub struct PromptTokensDetails {
261    /// Audio input tokens present in the prompt.
262    pub audio_tokens: Option<usize>,
263    /// Cached tokens present in the prompt.
264    pub cached_tokens: Option<usize>,
265}
266
267impl FromStr for ChatCompletion {
268    type Err = crate::errors::OapiError;
269
270    fn from_str(content: &str) -> Result<Self, Self::Err> {
271        let parse_result: Result<ChatCompletion, _> = serde_json::from_str(content)
272            .map_err(|e| OapiError::DeserializationError(e.to_string()));
273        parse_result
274    }
275}
276
277#[cfg(test)]
278mod test {
279    use super::*;
280
281    #[test]
282    fn no_streaming_example_deepseek() {
283        let json = r#"{
284          "id": "30f6413a-a827-4cf3-9898-f13a8634b798",
285          "object": "chat.completion",
286          "created": 1757944111,
287          "model": "deepseek-chat",
288          "choices": [
289            {
290              "index": 0,
291              "message": {
292                "role": "assistant",
293                "content": "Hello! How can I help you today? 😊"
294              },
295              "logprobs": null,
296              "finish_reason": "stop"
297            }
298          ],
299          "usage": {
300            "prompt_tokens": 10,
301            "completion_tokens": 11,
302            "total_tokens": 21,
303            "prompt_tokens_details": {
304              "cached_tokens": 0
305            },
306            "prompt_cache_hit_tokens": 0,
307            "prompt_cache_miss_tokens": 10
308          },
309          "system_fingerprint": "fp_08f168e49b_prod0820_fp8_kvcache"
310        }"#;
311
312        let parsed = ChatCompletion::from_str(json);
313        match parsed {
314            Ok(_) => {}
315            Err(e) => {
316                panic!("Failed to deserialize: {}", e);
317            }
318        }
319    }
320
321    #[test]
322    fn no_streaming_example_qwen() {
323        let json = r#"{
324            "choices": [
325                {
326                    "message": {
327                        "role": "assistant",
328                        "content": "我是阿里云开发的一款超大规模语言模型,我叫通义千问。"
329                    },
330                    "finish_reason": "stop",
331                    "index": 0,
332                    "logprobs": null
333                }
334            ],
335            "object": "chat.completion",
336            "usage": {
337                "prompt_tokens": 3019,
338                "completion_tokens": 104,
339                "total_tokens": 3123,
340                "prompt_tokens_details": {
341                    "cached_tokens": 2048
342                }
343            },
344            "created": 1735120033,
345            "system_fingerprint": null,
346            "model": "qwen-plus",
347            "id": "chatcmpl-6ada9ed2-7f33-9de2-8bb0-78bd4035025a"
348        }"#;
349
350        let parsed = ChatCompletion::from_str(json);
351        match parsed {
352            Ok(_) => {}
353            Err(e) => {
354                panic!("Failed to deserialize: {}", e);
355            }
356        }
357    }
358}