deepseek_sdk/completion/chat.rs
1//! Beta chat completion request types.
2//!
3//! The beta endpoint enables prefix/continuation behavior for assistant messages.
4pub mod request {
5 use crate::DeepSeekError;
6 use crate::chat::request::{
7 ReasoningEffort, ResponseFormat, Stop, StreamOptions, Thinking, ThinkingType, ToolChoice,
8 ToolType, is_none_or_empty_stop,
9 };
10 use crate::chat::response::ToolCall;
11 use crate::chat::{Chat, ChatStream, ChatStreamBlocking, ChatStreamItem, is_none_or_empty_vec};
12 use crate::{DeepSeekClient, DeepSeekRequest, api_post, api_request_stream};
13 use derive_builder::Builder;
14 use futures_util::StreamExt;
15 use reqwest::Method;
16 use reqwest_eventsource::Event;
17 use serde::{Deserialize, Serialize};
18 use std::sync::mpsc as std_mpsc;
19 use tokio::sync::mpsc;
20
21 fn is_false(value: &bool) -> bool {
22 !*value
23 }
24
25 /// Beta chat request payload (beta base URL required).
26 #[derive(Clone, Debug, Serialize, Builder)]
27 #[builder(
28 pattern = "owned",
29 setter(into, strip_option),
30 build_fn(validate = "Self::validate"),
31 name = "BetaChatRequestBuilder"
32 )]
33 pub struct BetaChatRequest {
34 #[serde(skip_serializing)]
35 pub client: DeepSeekClient,
36
37 /// A list of messages comprising the conversation so far.
38 #[builder(setter(each(name = "message", into)))]
39 pub messages: Vec<BetaChatMessage>,
40
41 /// Possible values: [`deepseek-v4-flash`, `deepseek-v4-pro`]
42 ///
43 /// ID of the model to use.
44 pub model: String,
45 /// 推理开关对象:{"type": "enabled" | "disabled"}。
46 #[builder(default)]
47 #[serde(skip_serializing_if = "Option::is_none")]
48
49 /// Controls the switch between thinking and non-thinking mode.
50 pub thinking: Option<Thinking>,
51
52 /// Possible values: [`high`, `max`]
53 ///
54 /// Controls the reasoning effort of the model.
55 /// The default effort is `high` for regular requests;
56 /// for some complex agent requests (such as Claude Code, OpenCode),
57 /// effort is automatically set to `max`.
58 /// For compatibility, `low` and `medium` are mapped to `high`,
59 /// and `xhigh` is mapped to `max`.
60 #[builder(default)]
61 #[serde(skip_serializing_if = "Option::is_none")]
62 pub reasoning_effort: Option<ReasoningEffort>,
63
64 /// The maximum number of tokens that can be generated in the chat completion.
65 ///
66 /// The total length of input tokens and generated tokens is limited by the model's context length.
67 ///
68 /// For the value range and default value, please refer to the [documentation](https://api-docs.deepseek.com/quick_start/pricing).
69 #[builder(default)]
70 #[serde(skip_serializing_if = "Option::is_none")]
71 pub max_tokens: Option<u32>,
72
73 /// An object specifying the format that the model must output.
74 /// Setting to { "type": "json_object" } enables JSON Output,
75 /// which guarantees the message the model generates is valid JSON.
76 ///
77 /// **Important**: When using JSON Output, you must also instruct the model to produce JSON yourself via a system or user message.
78 /// Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly "stuck" request. Also note that the message content may be partially cut off if finish_reason="length", which indicates the generation exceeded max_tokens or the conversation exceeded the max context length.
79 #[builder(default)]
80 #[serde(skip_serializing_if = "Option::is_none")]
81 pub response_format: Option<ResponseFormat>,
82
83 /// Up to 16 sequences where the API will stop generating further tokens.
84 #[builder(default)]
85 #[serde(skip_serializing_if = "is_none_or_empty_stop")]
86 pub stop: Option<Stop>,
87
88 /// If set, partial message deltas will be sent.
89 /// Tokens will be sent as data-only server-sent events (SSE) as they become available,
90 /// with the stream terminated by a `data: [DONE]`` message.
91 #[builder(default)]
92 #[serde(skip_serializing_if = "Option::is_none")]
93 pub stream: Option<bool>,
94
95 /// Options for streaming response. Only set this when you set `stream: true`.
96 #[builder(default)]
97 #[serde(skip_serializing_if = "Option::is_none")]
98 pub stream_options: Option<StreamOptions>,
99
100 /// Possible values: `<= 2`
101 ///
102 /// Default value: `1`
103 ///
104 /// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
105 /// We generally recommend altering this or `top_p` but not both.
106 #[builder(default)]
107 #[serde(skip_serializing_if = "Option::is_none")]
108 pub temperature: Option<f64>,
109
110 /// Possible values: `<= 1`
111 ///
112 /// Default value: `1`
113 ///
114 /// An alternative to sampling with temperature, called nucleus sampling,
115 /// where the model considers the results of the tokens with top_p probability mass.
116 /// So 0.1 means only the tokens comprising the top 10% probability mass are considered.
117 ///
118 /// We generally recommend altering this or `temperature` but not both.
119 #[builder(default)]
120 #[serde(skip_serializing_if = "Option::is_none")]
121 pub top_p: Option<f64>,
122
123 /// A list of tools the model may call. Currently, only functions are supported as a tool.
124 /// Use this to provide a list of functions the model may generate JSON inputs for.
125 /// A max of 128 functions are supported.
126 #[builder(default, setter(each(name = "tool", into)))]
127 #[serde(skip_serializing_if = "Vec::is_empty")]
128 pub tools: Vec<Tool>,
129
130 /// Controls which (if any) tool is called by the model.
131 /// `none` means the model will not call any tool and instead generates a message.
132 /// `auto` means the model can pick between generating a message or calling one or more tools.
133 /// `required` means the model must call one or more tools.
134 /// Specifying a particular tool via `{"type": "function", "function": {"name": "my_function"}}` forces the model to call that tool.
135 /// `none` is the default when no tools are present. `auto` is the default if tools are present.
136 #[builder(default)]
137 #[serde(skip_serializing_if = "Option::is_none")]
138 pub tool_choice: Option<ToolChoice>,
139
140 /// Whether to return log probabilities of the output tokens or not.
141 /// If true, returns the log probabilities of each output token returned in the `content` of `message`.
142 #[builder(default)]
143 #[serde(skip_serializing_if = "Option::is_none")]
144 pub logprobs: Option<bool>,
145
146 /// Possible values: `<= 20`
147 ///
148 /// An integer between 0 and 20 specifying the number of most likely tokens to return at each token position,
149 /// each with an associated log probability. `logprobs` must be set to `true` if this parameter is used.
150 #[builder(default)]
151 #[serde(skip_serializing_if = "Option::is_none")]
152 pub top_logprobs: Option<u32>,
153
154 /// A custom `user_id`. Allowed character set is `[a-zA-Z0-9\-_]`, with a maximum length of 512.
155 /// Do not include user privacy information in the `user_id`.
156
157 /// `user_id` can be used to distinguish user identities on your side to help us with content safety review.
158 /// `user_id` can be used for KVCache isolation for privacy management.
159 /// `user_id` can be used for scheduling isolation of users on your business side.
160 /// For more details on the `user_id` parameter, please refer to [Rate Limit & Isolation](https://api-docs.deepseek.com/quick_start/rate_limit)
161 #[builder(default)]
162 #[serde(skip_serializing_if = "Option::is_none")]
163 pub user_id: Option<String>,
164 }
165 /// Beta chat message variants.
166 #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]
167 #[serde(tag = "role", rename_all = "snake_case")]
168 pub enum BetaChatMessage {
169 System {
170 /// The contents of the system message.
171 content: String,
172 /// An optional name for the participant. Provides the model information to differentiate between participants of the same role.
173 #[serde(skip_serializing_if = "Option::is_none")]
174 name: Option<String>,
175 },
176 User {
177 /// The contents of the user message.
178 content: String,
179 /// An optional name for the participant. Provides the model information to differentiate between participants of the same role.
180 #[serde(skip_serializing_if = "Option::is_none")]
181 name: Option<String>,
182 },
183 Assistant {
184 /// The contents of the assistant message.
185 #[serde(skip_serializing_if = "Option::is_none")]
186 content: Option<String>,
187 /// An optional name for the participant. Provides the model information to differentiate between participants of the same role.
188 #[serde(skip_serializing_if = "Option::is_none")]
189 name: Option<String>,
190 /// (Beta) Set this to `true` to force the model to start its answer by the content of the supplied prefix in this `assistant` message.
191 /// You must set `base_url="https://api.deepseek.com/beta"` to use this feature.
192 #[serde(default, skip_serializing_if = "is_false")]
193 prefix: bool,
194 /// (Beta) Used for the thinking mode in the [Chat Prefix Completion](https://api-docs.deepseek.com/guides/chat_prefix_completion)
195 /// feature as the input for the CoT in the last assistant message.
196 /// When using this feature, the `prefix` parameter must be set to `true`.
197 #[serde(skip_serializing_if = "Option::is_none")]
198 reasoning_content: Option<String>,
199 #[serde(skip_serializing_if = "is_none_or_empty_vec")]
200 tool_calls: Option<Vec<ToolCall>>,
201 },
202 Tool {
203 /// The contents of the tool message.
204 content: String,
205 /// Tool call that this message is responding to.
206 tool_call_id: String,
207 },
208 }
209 /// Tool definition for beta chat requests.
210 #[derive(Clone, Debug, PartialEq, Eq, Serialize)]
211 pub struct Tool {
212 #[serde(rename = "type")]
213 pub typ: ToolType,
214 pub function: BetaToolFunctionDefinition,
215 }
216
217 impl Tool {
218 pub fn new(
219 name: impl Into<String>,
220 description: impl Into<String>,
221 parameters: Option<serde_json::Value>,
222 strict: Option<bool>,
223 ) -> Self {
224 Tool {
225 typ: ToolType::Function,
226 function: BetaToolFunctionDefinition {
227 name: name.into(),
228 description: description.into(),
229 parameters,
230 strict,
231 },
232 }
233 }
234 }
235 /// Tool function definition for beta chat requests.
236 #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]
237 pub struct BetaToolFunctionDefinition {
238 pub description: String,
239 pub name: String,
240 #[serde(skip_serializing_if = "Option::is_none")]
241 pub parameters: Option<serde_json::Value>,
242 /// (Beta) Default value: `false`
243 ///
244 /// If set to true, the API will use strict-mode for the tool calls to ensure the output always complies with the function's JSON schema.
245 /// This is a Beta feature, for more details please refer to [Tool Calls Guide](https://api-docs.deepseek.com/zh-cn/guides/tool_calls)
246 pub strict: Option<bool>,
247 }
248
249 impl BetaChatRequestBuilder {
250 fn validate(&self) -> Result<(), String> {
251 // derive_builder + strip_option makes Option<T> fields become Option<Option<T>> here;
252 // flatten() treats "unset" and "explicit None" uniformly for validation.
253 if let Some(temperature) = self.temperature.flatten() {
254 if !(0.0..=2.0).contains(&temperature) {
255 return Err("temperature must be between 0 and 2".to_string());
256 }
257 }
258
259 if let Some(top_p) = self.top_p.flatten() {
260 if !(0.0..=1.0).contains(&top_p) {
261 return Err("top_p must be between 0 and 1".to_string());
262 }
263 }
264
265 if let Some(top_logprobs) = self.top_logprobs.flatten() {
266 if top_logprobs > 20 {
267 return Err("top_logprobs must be <= 20".to_string());
268 }
269 if self.logprobs.flatten() != Some(true) {
270 return Err("top_logprobs requires logprobs=true".to_string());
271 }
272 }
273
274 if let Some(thinking) = self
275 .thinking
276 .as_ref()
277 .and_then(|thinking| thinking.as_ref())
278 {
279 if let Some(reasoning_effort) = self
280 .reasoning_effort
281 .as_ref()
282 .and_then(|effort| effort.as_ref())
283 {
284 if matches!(thinking.typ, ThinkingType::Disabled)
285 && matches!(
286 reasoning_effort,
287 ReasoningEffort::High | ReasoningEffort::Max
288 )
289 {
290 return Err(
291 "thinking options type cannot be disabled when reasoning_effort is set"
292 .to_string(),
293 );
294 }
295 }
296 }
297
298 if let Some(stream) = self.stream.flatten() {
299 if !stream && self.stream_options.is_some() {
300 return Err("stream_options cannot be set when stream is false".to_string());
301 }
302 }
303
304 if let Some(messages) = self.messages.as_ref() {
305 messages.iter().try_for_each(|message| {
306 if let BetaChatMessage::Assistant {
307 prefix: false,
308 reasoning_content: Some(_),
309 ..
310 } = message
311 {
312 return Err(
313 "reasoning_content cannot be set when assistant message prefix is false".to_string(),
314 );
315 }
316 Ok(())
317 })?;
318 }
319
320 if let Some(stop) = self.stop.as_ref().and_then(|s| s.as_ref()) {
321 if let Stop::Many(values) = stop {
322 if values.len() > 16 {
323 return Err("a maximum of 16 stop sequences are allowed".to_string());
324 }
325 }
326 }
327 Ok(())
328 }
329 }
330
331 impl DeepSeekRequest for BetaChatRequest {
332 type Response = Chat;
333 type StreamItem = ChatStreamItem;
334 type BlockingStream = ChatStreamBlocking;
335
336 async fn send(self) -> Result<Chat, DeepSeekError> {
337 let client = self.client.clone();
338 api_post("/chat/completions", &self, client).await
339 }
340
341 async fn stream(self) -> Result<mpsc::Receiver<ChatStreamItem>, DeepSeekError> {
342 let mut request = self;
343 request.stream = Some(true);
344
345 let client = request.client.clone();
346 let mut event_source = api_request_stream(
347 Method::POST,
348 "/chat/completions",
349 |builder| builder.json(&request),
350 client,
351 )
352 .await?;
353
354 let (tx, rx) = mpsc::channel(32);
355
356 tokio::spawn(async move {
357 while let Some(event) = event_source.next().await {
358 match event {
359 Ok(Event::Open) => {}
360 Ok(Event::Message(message)) => {
361 if message.data == "[DONE]" {
362 break;
363 }
364 match serde_json::from_str::<ChatStream>(&message.data) {
365 Ok(chunk) => {
366 if tx.send(Ok(chunk)).await.is_err() {
367 break;
368 }
369 }
370 Err(err) => {
371 let _ = tx
372 .send(Err(DeepSeekError::decode(
373 err.to_string(),
374 message.data,
375 )))
376 .await;
377 break;
378 }
379 }
380 }
381 Err(err) => {
382 let _ = tx
383 .send(Err(DeepSeekError::decode(err.to_string(), String::new())))
384 .await;
385 break;
386 }
387 }
388 }
389 });
390
391 Ok(rx)
392 }
393
394 fn stream_blocking(self) -> Result<ChatStreamBlocking, DeepSeekError> {
395 let (tx, rx) = std_mpsc::channel();
396
397 std::thread::spawn(move || {
398 let runtime = match tokio::runtime::Builder::new_current_thread()
399 .enable_all()
400 .build()
401 {
402 Ok(runtime) => runtime,
403 Err(err) => {
404 let _ = tx.send(Err(DeepSeekError::decode(err.to_string(), String::new())));
405 return;
406 }
407 };
408
409 runtime.block_on(async move {
410 match self.stream().await {
411 Ok(mut stream_rx) => {
412 while let Some(item) = stream_rx.recv().await {
413 if tx.send(item).is_err() {
414 break;
415 }
416 }
417 }
418 Err(err) => {
419 let _ = tx.send(Err(err));
420 }
421 }
422 });
423 });
424
425 Ok(ChatStreamBlocking { rx })
426 }
427 }
428}
429
430#[cfg(test)]
431mod tests {
432 use super::request::*;
433 use crate::{DEFAULT_BETA_BASE_URL, DeepSeekClient, DeepSeekRequest, chat::request::Thinking};
434
435 fn get_client() -> DeepSeekClient {
436 DeepSeekClient::new(
437 std::env::var("DEEPSEEK_API").expect("DEEPSEEK_API is not set"),
438 DEFAULT_BETA_BASE_URL.clone(),
439 )
440 }
441
442 fn get_builder() -> BetaChatRequestBuilder {
443 BetaChatRequestBuilder::default()
444 .client(get_client())
445 .model("deepseek-v4-flash")
446 .max_tokens(32_u32)
447 .thinking(Thinking::disabled())
448 }
449
450 #[tokio::test]
451 async fn beta_chat() {
452 let req = get_builder()
453 .message(BetaChatMessage::User {
454 content: "Please write quick sort code".to_string(),
455 name: None,
456 })
457 .message(BetaChatMessage::Assistant {
458 content: Some("```python\n".to_string()),
459 name: None,
460 prefix: true,
461 reasoning_content: None,
462 tool_calls: None,
463 })
464 .stop("```")
465 .build()
466 .unwrap();
467 let response = req.send().await.unwrap();
468 println!("{:#?}", response);
469 }
470}