Skip to main content

deepseek_sdk/completion/
chat.rs

1//! Beta chat completion request types.
2//!
3//! The beta endpoint enables prefix/continuation behavior for assistant messages.
4pub mod request {
5    use crate::DeepSeekError;
6    use crate::chat::request::{
7        ReasoningEffort, ResponseFormat, Stop, StreamOptions, Thinking, ThinkingType, ToolChoice,
8        ToolType, is_none_or_empty_stop,
9    };
10    use crate::chat::response::ToolCall;
11    use crate::chat::{Chat, ChatStream, ChatStreamBlocking, ChatStreamItem, is_none_or_empty_vec};
12    use crate::{DeepSeekClient, DeepSeekRequest, api_post, api_request_stream};
13    use derive_builder::Builder;
14    use futures_util::StreamExt;
15    use reqwest::Method;
16    use reqwest_eventsource::Event;
17    use serde::{Deserialize, Serialize};
18    use std::sync::mpsc as std_mpsc;
19    use tokio::sync::mpsc;
20
21    fn is_false(value: &bool) -> bool {
22        !*value
23    }
24
25    /// Beta chat request payload (beta base URL required).
26    #[derive(Clone, Debug, Serialize, Builder)]
27    #[builder(
28        pattern = "owned",
29        setter(into, strip_option),
30        build_fn(validate = "Self::validate"),
31        name = "BetaChatRequestBuilder"
32    )]
33    pub struct BetaChatRequest {
34        #[serde(skip_serializing)]
35        pub client: DeepSeekClient,
36
37        /// A list of messages comprising the conversation so far.
38        #[builder(setter(each(name = "message", into)))]
39        pub messages: Vec<BetaChatMessage>,
40
41        /// Possible values: [`deepseek-v4-flash`, `deepseek-v4-pro`]
42        ///
43        /// ID of the model to use.
44        pub model: String,
45        /// 推理开关对象:{"type": "enabled" | "disabled"}。
46        #[builder(default)]
47        #[serde(skip_serializing_if = "Option::is_none")]
48
49        /// Controls the switch between thinking and non-thinking mode.
50        pub thinking: Option<Thinking>,
51
52        /// Possible values: [`high`, `max`]
53        ///
54        /// Controls the reasoning effort of the model.
55        /// The default effort is `high` for regular requests;
56        /// for some complex agent requests (such as Claude Code, OpenCode),
57        /// effort is automatically set to `max`.
58        /// For compatibility, `low` and `medium` are mapped to `high`,
59        /// and `xhigh` is mapped to `max`.
60        #[builder(default)]
61        #[serde(skip_serializing_if = "Option::is_none")]
62        pub reasoning_effort: Option<ReasoningEffort>,
63
64        /// The maximum number of tokens that can be generated in the chat completion.
65        ///
66        /// The total length of input tokens and generated tokens is limited by the model's context length.
67        ///
68        /// For the value range and default value, please refer to the [documentation](https://api-docs.deepseek.com/quick_start/pricing).
69        #[builder(default)]
70        #[serde(skip_serializing_if = "Option::is_none")]
71        pub max_tokens: Option<u32>,
72
73        /// An object specifying the format that the model must output.
74        /// Setting to { "type": "json_object" } enables JSON Output,
75        /// which guarantees the message the model generates is valid JSON.
76        ///
77        /// **Important**: When using JSON Output, you must also instruct the model to produce JSON yourself via a system or user message.
78        /// Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly "stuck" request. Also note that the message content may be partially cut off if finish_reason="length", which indicates the generation exceeded max_tokens or the conversation exceeded the max context length.
79        #[builder(default)]
80        #[serde(skip_serializing_if = "Option::is_none")]
81        pub response_format: Option<ResponseFormat>,
82
83        /// Up to 16 sequences where the API will stop generating further tokens.
84        #[builder(default)]
85        #[serde(skip_serializing_if = "is_none_or_empty_stop")]
86        pub stop: Option<Stop>,
87
88        /// If set, partial message deltas will be sent.
89        /// Tokens will be sent as data-only server-sent events (SSE) as they become available,
90        /// with the stream terminated by a `data: [DONE]`` message.
91        #[builder(default)]
92        #[serde(skip_serializing_if = "Option::is_none")]
93        pub stream: Option<bool>,
94
95        /// Options for streaming response. Only set this when you set `stream: true`.
96        #[builder(default)]
97        #[serde(skip_serializing_if = "Option::is_none")]
98        pub stream_options: Option<StreamOptions>,
99
100        /// Possible values: `<= 2`
101        ///
102        /// Default value: `1`
103        ///
104        /// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
105        /// We generally recommend altering this or `top_p` but not both.
106        #[builder(default)]
107        #[serde(skip_serializing_if = "Option::is_none")]
108        pub temperature: Option<f64>,
109
110        /// Possible values: `<= 1`
111        ///
112        /// Default value: `1`
113        ///
114        /// An alternative to sampling with temperature, called nucleus sampling,
115        /// where the model considers the results of the tokens with top_p probability mass.
116        /// So 0.1 means only the tokens comprising the top 10% probability mass are considered.
117        ///
118        /// We generally recommend altering this or `temperature` but not both.
119        #[builder(default)]
120        #[serde(skip_serializing_if = "Option::is_none")]
121        pub top_p: Option<f64>,
122
123        /// A list of tools the model may call. Currently, only functions are supported as a tool.
124        /// Use this to provide a list of functions the model may generate JSON inputs for.
125        /// A max of 128 functions are supported.
126        #[builder(default, setter(each(name = "tool", into)))]
127        #[serde(skip_serializing_if = "Vec::is_empty")]
128        pub tools: Vec<Tool>,
129
130        /// Controls which (if any) tool is called by the model.
131        /// `none` means the model will not call any tool and instead generates a message.
132        /// `auto` means the model can pick between generating a message or calling one or more tools.
133        /// `required` means the model must call one or more tools.
134        /// Specifying a particular tool via `{"type": "function", "function": {"name": "my_function"}}` forces the model to call that tool.
135        /// `none` is the default when no tools are present. `auto` is the default if tools are present.
136        #[builder(default)]
137        #[serde(skip_serializing_if = "Option::is_none")]
138        pub tool_choice: Option<ToolChoice>,
139
140        /// Whether to return log probabilities of the output tokens or not.
141        /// If true, returns the log probabilities of each output token returned in the `content` of `message`.
142        #[builder(default)]
143        #[serde(skip_serializing_if = "Option::is_none")]
144        pub logprobs: Option<bool>,
145
146        /// Possible values: `<= 20`
147        ///
148        /// An integer between 0 and 20 specifying the number of most likely tokens to return at each token position,
149        /// each with an associated log probability. `logprobs` must be set to `true` if this parameter is used.
150        #[builder(default)]
151        #[serde(skip_serializing_if = "Option::is_none")]
152        pub top_logprobs: Option<u32>,
153
154        /// A custom `user_id`. Allowed character set is `[a-zA-Z0-9\-_]`, with a maximum length of 512.
155        /// Do not include user privacy information in the `user_id`.
156
157        /// `user_id` can be used to distinguish user identities on your side to help us with content safety review.
158        /// `user_id` can be used for KVCache isolation for privacy management.
159        /// `user_id` can be used for scheduling isolation of users on your business side.
160        /// For more details on the `user_id` parameter, please refer to [Rate Limit & Isolation](https://api-docs.deepseek.com/quick_start/rate_limit)
161        #[builder(default)]
162        #[serde(skip_serializing_if = "Option::is_none")]
163        pub user_id: Option<String>,
164    }
165    /// Beta chat message variants.
166    #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]
167    #[serde(tag = "role", rename_all = "snake_case")]
168    pub enum BetaChatMessage {
169        System {
170            /// The contents of the system message.
171            content: String,
172            /// An optional name for the participant. Provides the model information to differentiate between participants of the same role.
173            #[serde(skip_serializing_if = "Option::is_none")]
174            name: Option<String>,
175        },
176        User {
177            /// The contents of the user message.
178            content: String,
179            /// An optional name for the participant. Provides the model information to differentiate between participants of the same role.
180            #[serde(skip_serializing_if = "Option::is_none")]
181            name: Option<String>,
182        },
183        Assistant {
184            /// The contents of the assistant message.
185            #[serde(skip_serializing_if = "Option::is_none")]
186            content: Option<String>,
187            /// An optional name for the participant. Provides the model information to differentiate between participants of the same role.
188            #[serde(skip_serializing_if = "Option::is_none")]
189            name: Option<String>,
190            /// (Beta) Set this to `true` to force the model to start its answer by the content of the supplied prefix in this `assistant` message.
191            /// You must set `base_url="https://api.deepseek.com/beta"` to use this feature.
192            #[serde(default, skip_serializing_if = "is_false")]
193            prefix: bool,
194            /// (Beta) Used for the thinking mode in the [Chat Prefix Completion](https://api-docs.deepseek.com/guides/chat_prefix_completion)
195            /// feature as the input for the CoT in the last assistant message.
196            /// When using this feature, the `prefix` parameter must be set to `true`.
197            #[serde(skip_serializing_if = "Option::is_none")]
198            reasoning_content: Option<String>,
199            #[serde(skip_serializing_if = "is_none_or_empty_vec")]
200            tool_calls: Option<Vec<ToolCall>>,
201        },
202        Tool {
203            /// The contents of the tool message.
204            content: String,
205            /// Tool call that this message is responding to.
206            tool_call_id: String,
207        },
208    }
209    /// Tool definition for beta chat requests.
210    #[derive(Clone, Debug, PartialEq, Eq, Serialize)]
211    pub struct Tool {
212        #[serde(rename = "type")]
213        pub typ: ToolType,
214        pub function: BetaToolFunctionDefinition,
215    }
216
217    impl Tool {
218        pub fn new(
219            name: impl Into<String>,
220            description: impl Into<String>,
221            parameters: Option<serde_json::Value>,
222            strict: Option<bool>,
223        ) -> Self {
224            Tool {
225                typ: ToolType::Function,
226                function: BetaToolFunctionDefinition {
227                    name: name.into(),
228                    description: description.into(),
229                    parameters,
230                    strict,
231                },
232            }
233        }
234    }
235    /// Tool function definition for beta chat requests.
236    #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]
237    pub struct BetaToolFunctionDefinition {
238        pub description: String,
239        pub name: String,
240        #[serde(skip_serializing_if = "Option::is_none")]
241        pub parameters: Option<serde_json::Value>,
242        /// (Beta) Default value: `false`
243        ///
244        /// If set to true, the API will use strict-mode for the tool calls to ensure the output always complies with the function's JSON schema.
245        /// This is a Beta feature, for more details please refer to [Tool Calls Guide](https://api-docs.deepseek.com/zh-cn/guides/tool_calls)
246        pub strict: Option<bool>,
247    }
248
249    impl BetaChatRequestBuilder {
250        fn validate(&self) -> Result<(), String> {
251            // derive_builder + strip_option makes Option<T> fields become Option<Option<T>> here;
252            // flatten() treats "unset" and "explicit None" uniformly for validation.
253            if let Some(temperature) = self.temperature.flatten() {
254                if !(0.0..=2.0).contains(&temperature) {
255                    return Err("temperature must be between 0 and 2".to_string());
256                }
257            }
258
259            if let Some(top_p) = self.top_p.flatten() {
260                if !(0.0..=1.0).contains(&top_p) {
261                    return Err("top_p must be between 0 and 1".to_string());
262                }
263            }
264
265            if let Some(top_logprobs) = self.top_logprobs.flatten() {
266                if top_logprobs > 20 {
267                    return Err("top_logprobs must be <= 20".to_string());
268                }
269                if self.logprobs.flatten() != Some(true) {
270                    return Err("top_logprobs requires logprobs=true".to_string());
271                }
272            }
273
274            if let Some(thinking) = self
275                .thinking
276                .as_ref()
277                .and_then(|thinking| thinking.as_ref())
278            {
279                if let Some(reasoning_effort) = self
280                    .reasoning_effort
281                    .as_ref()
282                    .and_then(|effort| effort.as_ref())
283                {
284                    if matches!(thinking.typ, ThinkingType::Disabled)
285                        && matches!(
286                            reasoning_effort,
287                            ReasoningEffort::High | ReasoningEffort::Max
288                        )
289                    {
290                        return Err(
291                            "thinking options type cannot be disabled when reasoning_effort is set"
292                                .to_string(),
293                        );
294                    }
295                }
296            }
297
298            if let Some(stream) = self.stream.flatten() {
299                if !stream && self.stream_options.is_some() {
300                    return Err("stream_options cannot be set when stream is false".to_string());
301                }
302            }
303
304            if let Some(messages) = self.messages.as_ref() {
305                messages.iter().try_for_each(|message| {
306                    if let BetaChatMessage::Assistant {
307                        prefix: false,
308                        reasoning_content: Some(_),
309                        ..
310                    } = message
311                    {
312                        return Err(
313                            "reasoning_content cannot be set when assistant message prefix is false".to_string(),
314                        );
315                    }
316                    Ok(())
317                })?;
318            }
319
320            if let Some(stop) = self.stop.as_ref().and_then(|s| s.as_ref()) {
321                if let Stop::Many(values) = stop {
322                    if values.len() > 16 {
323                        return Err("a maximum of 16 stop sequences are allowed".to_string());
324                    }
325                }
326            }
327            Ok(())
328        }
329    }
330
331    impl DeepSeekRequest for BetaChatRequest {
332        type Response = Chat;
333        type StreamItem = ChatStreamItem;
334        type BlockingStream = ChatStreamBlocking;
335
336        async fn send(self) -> Result<Chat, DeepSeekError> {
337            let client = self.client.clone();
338            api_post("/chat/completions", &self, client).await
339        }
340
341        async fn stream(self) -> Result<mpsc::Receiver<ChatStreamItem>, DeepSeekError> {
342            let mut request = self;
343            request.stream = Some(true);
344
345            let client = request.client.clone();
346            let mut event_source = api_request_stream(
347                Method::POST,
348                "/chat/completions",
349                |builder| builder.json(&request),
350                client,
351            )
352            .await?;
353
354            let (tx, rx) = mpsc::channel(32);
355
356            tokio::spawn(async move {
357                while let Some(event) = event_source.next().await {
358                    match event {
359                        Ok(Event::Open) => {}
360                        Ok(Event::Message(message)) => {
361                            if message.data == "[DONE]" {
362                                break;
363                            }
364                            match serde_json::from_str::<ChatStream>(&message.data) {
365                                Ok(chunk) => {
366                                    if tx.send(Ok(chunk)).await.is_err() {
367                                        break;
368                                    }
369                                }
370                                Err(err) => {
371                                    let _ = tx
372                                        .send(Err(DeepSeekError::decode(
373                                            err.to_string(),
374                                            message.data,
375                                        )))
376                                        .await;
377                                    break;
378                                }
379                            }
380                        }
381                        Err(err) => {
382                            let _ = tx
383                                .send(Err(DeepSeekError::decode(err.to_string(), String::new())))
384                                .await;
385                            break;
386                        }
387                    }
388                }
389            });
390
391            Ok(rx)
392        }
393
394        fn stream_blocking(self) -> Result<ChatStreamBlocking, DeepSeekError> {
395            let (tx, rx) = std_mpsc::channel();
396
397            std::thread::spawn(move || {
398                let runtime = match tokio::runtime::Builder::new_current_thread()
399                    .enable_all()
400                    .build()
401                {
402                    Ok(runtime) => runtime,
403                    Err(err) => {
404                        let _ = tx.send(Err(DeepSeekError::decode(err.to_string(), String::new())));
405                        return;
406                    }
407                };
408
409                runtime.block_on(async move {
410                    match self.stream().await {
411                        Ok(mut stream_rx) => {
412                            while let Some(item) = stream_rx.recv().await {
413                                if tx.send(item).is_err() {
414                                    break;
415                                }
416                            }
417                        }
418                        Err(err) => {
419                            let _ = tx.send(Err(err));
420                        }
421                    }
422                });
423            });
424
425            Ok(ChatStreamBlocking { rx })
426        }
427    }
428}
429
430#[cfg(test)]
431mod tests {
432    use super::request::*;
433    use crate::{DEFAULT_BETA_BASE_URL, DeepSeekClient, DeepSeekRequest, chat::request::Thinking};
434
435    fn get_client() -> DeepSeekClient {
436        DeepSeekClient::new(
437            std::env::var("DEEPSEEK_API").expect("DEEPSEEK_API is not set"),
438            DEFAULT_BETA_BASE_URL.clone(),
439        )
440    }
441
442    fn get_builder() -> BetaChatRequestBuilder {
443        BetaChatRequestBuilder::default()
444            .client(get_client())
445            .model("deepseek-v4-flash")
446            .max_tokens(32_u32)
447            .thinking(Thinking::disabled())
448    }
449
450    #[tokio::test]
451    async fn beta_chat() {
452        let req = get_builder()
453            .message(BetaChatMessage::User {
454                content: "Please write quick sort code".to_string(),
455                name: None,
456            })
457            .message(BetaChatMessage::Assistant {
458                content: Some("```python\n".to_string()),
459                name: None,
460                prefix: true,
461                reasoning_content: None,
462                tool_calls: None,
463            })
464            .stop("```")
465            .build()
466            .unwrap();
467        let response = req.send().await.unwrap();
468        println!("{:#?}", response);
469    }
470}