Skip to main content

deepseek_sdk/completion/
chat.rs

1//! Beta chat completion request types.
2//!
3//! The beta endpoint enables prefix/continuation behavior for assistant messages.
4pub mod request {
5    use crate::DeepSeekError;
6    use crate::chat::request::{
7        ReasoningEffort, ResponseFormat, Stop, StreamOptions, Thinking, ToolChoice,
8        ToolType, is_none_or_empty_stop,
9    };
10    use crate::chat::response::ToolCall;
11    use crate::chat::{Chat, ChatStream, ChatStreamBlocking, ChatStreamItem, is_none_or_empty_vec};
12    use crate::{DeepSeekClient, DeepSeekRequest, api_post, api_request_stream};
13    use derive_builder::Builder;
14    use futures_util::StreamExt;
15    use reqwest::Method;
16    use reqwest_eventsource::Event;
17    use serde::{Deserialize, Serialize};
18    use std::sync::mpsc as std_mpsc;
19    use tokio::sync::mpsc;
20
21    fn is_false(value: &bool) -> bool {
22        !*value
23    }
24
25    /// Beta chat request payload (beta base URL required).
26    #[derive(Clone, Debug, PartialEq, Serialize, Builder)]
27    #[builder(
28        pattern = "owned",
29        setter(into, strip_option),
30        build_fn(validate = "Self::validate"),
31        name = "BetaChatRequestBuilder"
32    )]
33    pub struct BetaChatRequest {
34        #[serde(skip_serializing)]
35        pub client: DeepSeekClient,
36
37        /// A list of messages comprising the conversation so far.
38        #[builder(setter(each(name = "message", into)))]
39        pub messages: Vec<BetaChatMessage>,
40
41        /// Possible values: [`deepseek-v4-flash`, `deepseek-v4-pro`]
42        ///
43        /// ID of the model to use.
44        pub model: String,
45
46        /// Controls the switch between thinking and non-thinking mode.
47        #[builder(default)]
48        #[serde(skip_serializing_if = "Option::is_none")]
49        pub thinking: Option<Thinking>,
50
51        /// Possible values: [`high`, `max`]
52        ///
53        /// Controls the reasoning effort of the model.
54        /// The default effort is `high` for regular requests;
55        /// for some complex agent requests (such as Claude Code, OpenCode),
56        /// effort is automatically set to `max`.
57        /// For compatibility, `low` and `medium` are mapped to `high`,
58        /// and `xhigh` is mapped to `max`.
59        #[builder(default)]
60        #[serde(skip_serializing_if = "Option::is_none")]
61        pub reasoning_effort: Option<ReasoningEffort>,
62
63        /// The maximum number of tokens that can be generated in the chat completion.
64        ///
65        /// The total length of input tokens and generated tokens is limited by the model's context length.
66        ///
67        /// For the value range and default value, please refer to the [documentation](https://api-docs.deepseek.com/quick_start/pricing).
68        #[builder(default)]
69        #[serde(skip_serializing_if = "Option::is_none")]
70        pub max_tokens: Option<u32>,
71
72        /// An object specifying the format that the model must output.
73        /// Setting to { "type": "json_object" } enables JSON Output,
74        /// which guarantees the message the model generates is valid JSON.
75        ///
76        /// **Important**: When using JSON Output, you must also instruct the model to produce JSON yourself via a system or user message.
77        /// Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly "stuck" request. Also note that the message content may be partially cut off if finish_reason="length", which indicates the generation exceeded max_tokens or the conversation exceeded the max context length.
78        #[builder(default)]
79        #[serde(skip_serializing_if = "Option::is_none")]
80        pub response_format: Option<ResponseFormat>,
81
82        /// Up to 16 sequences where the API will stop generating further tokens.
83        #[builder(default)]
84        #[serde(skip_serializing_if = "is_none_or_empty_stop")]
85        pub stop: Option<Stop>,
86
87        /// If set, partial message deltas will be sent.
88        /// Tokens will be sent as data-only server-sent events (SSE) as they become available,
89        /// with the stream terminated by a `data: [DONE]`` message.
90        #[builder(default)]
91        #[serde(skip_serializing_if = "Option::is_none")]
92        pub stream: Option<bool>,
93
94        /// Options for streaming response. Only set this when you set `stream: true`.
95        #[builder(default)]
96        #[serde(skip_serializing_if = "Option::is_none")]
97        pub stream_options: Option<StreamOptions>,
98
99        /// Possible values: `<= 2`
100        ///
101        /// Default value: `1`
102        ///
103        /// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
104        /// We generally recommend altering this or `top_p` but not both.
105        #[builder(default)]
106        #[serde(skip_serializing_if = "Option::is_none")]
107        pub temperature: Option<f64>,
108
109        /// Possible values: `<= 1`
110        ///
111        /// Default value: `1`
112        ///
113        /// An alternative to sampling with temperature, called nucleus sampling,
114        /// where the model considers the results of the tokens with top_p probability mass.
115        /// So 0.1 means only the tokens comprising the top 10% probability mass are considered.
116        ///
117        /// We generally recommend altering this or `temperature` but not both.
118        #[builder(default)]
119        #[serde(skip_serializing_if = "Option::is_none")]
120        pub top_p: Option<f64>,
121
122        /// A list of tools the model may call. Currently, only functions are supported as a tool.
123        /// Use this to provide a list of functions the model may generate JSON inputs for.
124        /// A max of 128 functions are supported.
125        #[builder(default, setter(each(name = "tool", into)))]
126        #[serde(skip_serializing_if = "Vec::is_empty")]
127        pub tools: Vec<Tool>,
128
129        /// Controls which (if any) tool is called by the model.
130        /// `none` means the model will not call any tool and instead generates a message.
131        /// `auto` means the model can pick between generating a message or calling one or more tools.
132        /// `required` means the model must call one or more tools.
133        /// Specifying a particular tool via `{"type": "function", "function": {"name": "my_function"}}` forces the model to call that tool.
134        /// `none` is the default when no tools are present. `auto` is the default if tools are present.
135        #[builder(default)]
136        #[serde(skip_serializing_if = "Option::is_none")]
137        pub tool_choice: Option<ToolChoice>,
138
139        /// Whether to return log probabilities of the output tokens or not.
140        /// If true, returns the log probabilities of each output token returned in the `content` of `message`.
141        #[builder(default)]
142        #[serde(skip_serializing_if = "Option::is_none")]
143        pub logprobs: Option<bool>,
144
145        /// Possible values: `<= 20`
146        ///
147        /// An integer between 0 and 20 specifying the number of most likely tokens to return at each token position,
148        /// each with an associated log probability. `logprobs` must be set to `true` if this parameter is used.
149        #[builder(default)]
150        #[serde(skip_serializing_if = "Option::is_none")]
151        pub top_logprobs: Option<u32>,
152
153        /// A custom `user_id`. Allowed character set is `[a-zA-Z0-9\-_]`, with a maximum length of 512.
154        /// Do not include user privacy information in the `user_id`.
155
156        /// `user_id` can be used to distinguish user identities on your side to help us with content safety review.
157        /// `user_id` can be used for KVCache isolation for privacy management.
158        /// `user_id` can be used for scheduling isolation of users on your business side.
159        /// For more details on the `user_id` parameter, please refer to [Rate Limit & Isolation](https://api-docs.deepseek.com/quick_start/rate_limit)
160        #[builder(default)]
161        #[serde(skip_serializing_if = "Option::is_none")]
162        pub user_id: Option<String>,
163    }
164    /// Beta chat message variants.
165    #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]
166    #[serde(tag = "role", rename_all = "snake_case")]
167    pub enum BetaChatMessage {
168        System {
169            /// The contents of the system message.
170            content: String,
171            /// An optional name for the participant. Provides the model information to differentiate between participants of the same role.
172            #[serde(skip_serializing_if = "Option::is_none")]
173            name: Option<String>,
174        },
175        User {
176            /// The contents of the user message.
177            content: String,
178            /// An optional name for the participant. Provides the model information to differentiate between participants of the same role.
179            #[serde(skip_serializing_if = "Option::is_none")]
180            name: Option<String>,
181        },
182        Assistant {
183            /// The contents of the assistant message.
184            #[serde(skip_serializing_if = "Option::is_none")]
185            content: Option<String>,
186            /// An optional name for the participant. Provides the model information to differentiate between participants of the same role.
187            #[serde(skip_serializing_if = "Option::is_none")]
188            name: Option<String>,
189            /// (Beta) Set this to `true` to force the model to start its answer by the content of the supplied prefix in this `assistant` message.
190            /// You must set `base_url="https://api.deepseek.com/beta"` to use this feature.
191            #[serde(default, skip_serializing_if = "is_false")]
192            prefix: bool,
193            /// (Beta) Used for the thinking mode in the [Chat Prefix Completion](https://api-docs.deepseek.com/guides/chat_prefix_completion)
194            /// feature as the input for the CoT in the last assistant message.
195            /// When using this feature, the `prefix` parameter must be set to `true`.
196            #[serde(skip_serializing_if = "Option::is_none")]
197            reasoning_content: Option<String>,
198            #[serde(skip_serializing_if = "is_none_or_empty_vec")]
199            tool_calls: Option<Vec<ToolCall>>,
200        },
201        Tool {
202            /// The contents of the tool message.
203            content: String,
204            /// Tool call that this message is responding to.
205            tool_call_id: String,
206        },
207    }
208    /// Tool definition for beta chat requests.
209    #[derive(Clone, Debug, PartialEq, Eq, Serialize)]
210    pub struct Tool {
211        #[serde(rename = "type")]
212        pub typ: ToolType,
213        pub function: BetaToolFunctionDefinition,
214    }
215
216    impl Tool {
217        pub fn new(
218            name: impl Into<String>,
219            description: impl Into<String>,
220            parameters: Option<serde_json::Value>,
221            strict: Option<bool>,
222        ) -> Self {
223            Tool {
224                typ: ToolType::Function,
225                function: BetaToolFunctionDefinition {
226                    name: name.into(),
227                    description: description.into(),
228                    parameters,
229                    strict,
230                },
231            }
232        }
233    }
234    /// Tool function definition for beta chat requests.
235    #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]
236    pub struct BetaToolFunctionDefinition {
237        pub description: String,
238        pub name: String,
239        #[serde(skip_serializing_if = "Option::is_none")]
240        pub parameters: Option<serde_json::Value>,
241        /// (Beta) Default value: `false`
242        ///
243        /// If set to true, the API will use strict-mode for the tool calls to ensure the output always complies with the function's JSON schema.
244        /// This is a Beta feature, for more details please refer to [Tool Calls Guide](https://api-docs.deepseek.com/zh-cn/guides/tool_calls)
245        #[serde(skip_serializing_if = "Option::is_none")]
246        pub strict: Option<bool>,
247    }
248
249    impl BetaChatRequestBuilder {
250        fn validate(&self) -> Result<(), String> {
251            // derive_builder + strip_option makes Option<T> fields become Option<Option<T>> here;
252            // flatten() treats "unset" and "explicit None" uniformly for validation.
253            if let Some(temperature) = self.temperature.flatten()
254                && !(0.0..=2.0).contains(&temperature) {
255                    return Err("temperature must be between 0 and 2".to_string());
256                }
257
258            if let Some(top_p) = self.top_p.flatten()
259                && !(0.0..=1.0).contains(&top_p) {
260                    return Err("top_p must be between 0 and 1".to_string());
261                }
262
263            if let Some(top_logprobs) = self.top_logprobs.flatten() {
264                if top_logprobs > 20 {
265                    return Err("top_logprobs must be <= 20".to_string());
266                }
267                if self.logprobs.flatten() != Some(true) {
268                    return Err("top_logprobs requires logprobs=true".to_string());
269                }
270            }
271
272            if let Some(stream) = self.stream.flatten()
273                && !stream && self.stream_options.is_some() {
274                    return Err("stream_options cannot be set when stream is false".to_string());
275                }
276
277            if let Some(messages) = self.messages.as_ref() {
278                messages.iter().try_for_each(|message| {
279                    if let BetaChatMessage::Assistant {
280                        prefix: false,
281                        reasoning_content: Some(_),
282                        ..
283                    } = message
284                    {
285                        return Err(
286                            "reasoning_content cannot be set when assistant message prefix is false".to_string(),
287                        );
288                    }
289                    Ok(())
290                })?;
291            }
292
293            if let Some(stop) = self.stop.as_ref().and_then(|s| s.as_ref())
294                && let Stop::Many(values) = stop
295                    && values.len() > 16 {
296                        return Err("a maximum of 16 stop sequences are allowed".to_string());
297                    }
298
299            if let Some(user_id) = self.user_id.as_ref().and_then(|u| u.as_ref()) {
300                if user_id.len() > 512 {
301                    return Err("user_id must be at most 512 characters".to_string());
302                }
303                if !user_id
304                    .chars()
305                    .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
306                {
307                    return Err(
308                        "user_id must only contain [a-zA-Z0-9\\-_]".to_string(),
309                    );
310                }
311            }
312
313            Ok(())
314        }
315    }
316
317    impl DeepSeekRequest for BetaChatRequest {
318        type Response = Chat;
319        type StreamItem = ChatStreamItem;
320        type BlockingStream = ChatStreamBlocking;
321
322        async fn send(self) -> Result<Chat, DeepSeekError> {
323            let client = self.client.clone();
324            api_post("/chat/completions", &self, client).await
325        }
326
327        async fn stream(self) -> Result<mpsc::Receiver<ChatStreamItem>, DeepSeekError> {
328            let mut request = self;
329            request.stream = Some(true);
330
331            let client = request.client.clone();
332            let mut event_source = api_request_stream(
333                Method::POST,
334                "/chat/completions",
335                |builder| builder.json(&request),
336                client,
337            )
338            .await?;
339
340            let (tx, rx) = mpsc::channel(32);
341
342            tokio::spawn(async move {
343                while let Some(event) = event_source.next().await {
344                    match event {
345                        Ok(Event::Open) => {}
346                        Ok(Event::Message(message)) => {
347                            if message.data == "[DONE]" {
348                                break;
349                            }
350                            match serde_json::from_str::<ChatStream>(&message.data) {
351                                Ok(chunk) => {
352                                    if tx.send(Ok(chunk)).await.is_err() {
353                                        break;
354                                    }
355                                }
356                                Err(err) => {
357                                    let _ = tx
358                                        .send(Err(DeepSeekError::decode(
359                                            err.to_string(),
360                                            message.data,
361                                        )))
362                                        .await;
363                                    break;
364                                }
365                            }
366                        }
367                        Err(err) => {
368                            let _ = tx
369                                .send(Err(DeepSeekError::decode(err.to_string(), String::new())))
370                                .await;
371                            break;
372                        }
373                    }
374                }
375            });
376
377            Ok(rx)
378        }
379
380        fn stream_blocking(self) -> Result<ChatStreamBlocking, DeepSeekError> {
381            let (tx, rx) = std_mpsc::channel();
382
383            std::thread::spawn(move || {
384                let runtime = match tokio::runtime::Builder::new_current_thread()
385                    .enable_all()
386                    .build()
387                {
388                    Ok(runtime) => runtime,
389                    Err(err) => {
390                        let _ = tx.send(Err(DeepSeekError::decode(err.to_string(), String::new())));
391                        return;
392                    }
393                };
394
395                runtime.block_on(async move {
396                    match self.stream().await {
397                        Ok(mut stream_rx) => {
398                            while let Some(item) = stream_rx.recv().await {
399                                if tx.send(item).is_err() {
400                                    break;
401                                }
402                            }
403                        }
404                        Err(err) => {
405                            let _ = tx.send(Err(err));
406                        }
407                    }
408                });
409            });
410
411            Ok(ChatStreamBlocking { rx })
412        }
413    }
414}
415
416#[cfg(test)]
417mod tests {
418    use super::request::*;
419    use crate::{DEFAULT_BETA_BASE_URL, DeepSeekClient, DeepSeekRequest, chat::request::Thinking};
420
421    fn get_client() -> DeepSeekClient {
422        DeepSeekClient::new(
423            std::env::var("DEEPSEEK_API").expect("DEEPSEEK_API is not set"),
424            DEFAULT_BETA_BASE_URL.clone(),
425        )
426    }
427
428    fn get_builder() -> BetaChatRequestBuilder {
429        BetaChatRequestBuilder::default()
430            .client(get_client())
431            .model("deepseek-v4-flash")
432            .max_tokens(32_u32)
433            .thinking(Thinking::disabled())
434    }
435
436    #[tokio::test]
437    async fn beta_chat() {
438        let req = get_builder()
439            .message(BetaChatMessage::User {
440                content: "Please write quick sort code".to_string(),
441                name: None,
442            })
443            .message(BetaChatMessage::Assistant {
444                content: Some("```python\n".to_string()),
445                name: None,
446                prefix: true,
447                reasoning_content: None,
448                tool_calls: None,
449            })
450            .stop("```")
451            .build()
452            .unwrap();
453        let response = req.send().await.unwrap();
454        println!("{:#?}", response);
455    }
456}