deepseek_sdk/completion/chat.rs
1//! Beta chat completion request types.
2//!
3//! The beta endpoint enables prefix/continuation behavior for assistant messages.
4pub mod request {
5 use crate::DeepSeekError;
6 use crate::chat::request::{
7 ReasoningEffort, ResponseFormat, Stop, StreamOptions, Thinking, ToolChoice,
8 ToolType, is_none_or_empty_stop,
9 };
10 use crate::chat::response::ToolCall;
11 use crate::chat::{Chat, ChatStream, ChatStreamBlocking, ChatStreamItem, is_none_or_empty_vec};
12 use crate::{DeepSeekClient, DeepSeekRequest, api_post, api_request_stream};
13 use derive_builder::Builder;
14 use futures_util::StreamExt;
15 use reqwest::Method;
16 use reqwest_eventsource::Event;
17 use serde::{Deserialize, Serialize};
18 use std::sync::mpsc as std_mpsc;
19 use tokio::sync::mpsc;
20
21 fn is_false(value: &bool) -> bool {
22 !*value
23 }
24
25 /// Beta chat request payload (beta base URL required).
26 #[derive(Clone, Debug, PartialEq, Serialize, Builder)]
27 #[builder(
28 pattern = "owned",
29 setter(into, strip_option),
30 build_fn(validate = "Self::validate"),
31 name = "BetaChatRequestBuilder"
32 )]
33 pub struct BetaChatRequest {
34 #[serde(skip_serializing)]
35 pub client: DeepSeekClient,
36
37 /// A list of messages comprising the conversation so far.
38 #[builder(setter(each(name = "message", into)))]
39 pub messages: Vec<BetaChatMessage>,
40
41 /// Possible values: [`deepseek-v4-flash`, `deepseek-v4-pro`]
42 ///
43 /// ID of the model to use.
44 pub model: String,
45
46 /// Controls the switch between thinking and non-thinking mode.
47 #[builder(default)]
48 #[serde(skip_serializing_if = "Option::is_none")]
49 pub thinking: Option<Thinking>,
50
51 /// Possible values: [`high`, `max`]
52 ///
53 /// Controls the reasoning effort of the model.
54 /// The default effort is `high` for regular requests;
55 /// for some complex agent requests (such as Claude Code, OpenCode),
56 /// effort is automatically set to `max`.
57 /// For compatibility, `low` and `medium` are mapped to `high`,
58 /// and `xhigh` is mapped to `max`.
59 #[builder(default)]
60 #[serde(skip_serializing_if = "Option::is_none")]
61 pub reasoning_effort: Option<ReasoningEffort>,
62
63 /// The maximum number of tokens that can be generated in the chat completion.
64 ///
65 /// The total length of input tokens and generated tokens is limited by the model's context length.
66 ///
67 /// For the value range and default value, please refer to the [documentation](https://api-docs.deepseek.com/quick_start/pricing).
68 #[builder(default)]
69 #[serde(skip_serializing_if = "Option::is_none")]
70 pub max_tokens: Option<u32>,
71
72 /// An object specifying the format that the model must output.
73 /// Setting to { "type": "json_object" } enables JSON Output,
74 /// which guarantees the message the model generates is valid JSON.
75 ///
76 /// **Important**: When using JSON Output, you must also instruct the model to produce JSON yourself via a system or user message.
77 /// Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly "stuck" request. Also note that the message content may be partially cut off if finish_reason="length", which indicates the generation exceeded max_tokens or the conversation exceeded the max context length.
78 #[builder(default)]
79 #[serde(skip_serializing_if = "Option::is_none")]
80 pub response_format: Option<ResponseFormat>,
81
82 /// Up to 16 sequences where the API will stop generating further tokens.
83 #[builder(default)]
84 #[serde(skip_serializing_if = "is_none_or_empty_stop")]
85 pub stop: Option<Stop>,
86
87 /// If set, partial message deltas will be sent.
88 /// Tokens will be sent as data-only server-sent events (SSE) as they become available,
89 /// with the stream terminated by a `data: [DONE]`` message.
90 #[builder(default)]
91 #[serde(skip_serializing_if = "Option::is_none")]
92 pub stream: Option<bool>,
93
94 /// Options for streaming response. Only set this when you set `stream: true`.
95 #[builder(default)]
96 #[serde(skip_serializing_if = "Option::is_none")]
97 pub stream_options: Option<StreamOptions>,
98
99 /// Possible values: `<= 2`
100 ///
101 /// Default value: `1`
102 ///
103 /// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
104 /// We generally recommend altering this or `top_p` but not both.
105 #[builder(default)]
106 #[serde(skip_serializing_if = "Option::is_none")]
107 pub temperature: Option<f64>,
108
109 /// Possible values: `<= 1`
110 ///
111 /// Default value: `1`
112 ///
113 /// An alternative to sampling with temperature, called nucleus sampling,
114 /// where the model considers the results of the tokens with top_p probability mass.
115 /// So 0.1 means only the tokens comprising the top 10% probability mass are considered.
116 ///
117 /// We generally recommend altering this or `temperature` but not both.
118 #[builder(default)]
119 #[serde(skip_serializing_if = "Option::is_none")]
120 pub top_p: Option<f64>,
121
122 /// A list of tools the model may call. Currently, only functions are supported as a tool.
123 /// Use this to provide a list of functions the model may generate JSON inputs for.
124 /// A max of 128 functions are supported.
125 #[builder(default, setter(each(name = "tool", into)))]
126 #[serde(skip_serializing_if = "Vec::is_empty")]
127 pub tools: Vec<Tool>,
128
129 /// Controls which (if any) tool is called by the model.
130 /// `none` means the model will not call any tool and instead generates a message.
131 /// `auto` means the model can pick between generating a message or calling one or more tools.
132 /// `required` means the model must call one or more tools.
133 /// Specifying a particular tool via `{"type": "function", "function": {"name": "my_function"}}` forces the model to call that tool.
134 /// `none` is the default when no tools are present. `auto` is the default if tools are present.
135 #[builder(default)]
136 #[serde(skip_serializing_if = "Option::is_none")]
137 pub tool_choice: Option<ToolChoice>,
138
139 /// Whether to return log probabilities of the output tokens or not.
140 /// If true, returns the log probabilities of each output token returned in the `content` of `message`.
141 #[builder(default)]
142 #[serde(skip_serializing_if = "Option::is_none")]
143 pub logprobs: Option<bool>,
144
145 /// Possible values: `<= 20`
146 ///
147 /// An integer between 0 and 20 specifying the number of most likely tokens to return at each token position,
148 /// each with an associated log probability. `logprobs` must be set to `true` if this parameter is used.
149 #[builder(default)]
150 #[serde(skip_serializing_if = "Option::is_none")]
151 pub top_logprobs: Option<u32>,
152
153 /// A custom `user_id`. Allowed character set is `[a-zA-Z0-9\-_]`, with a maximum length of 512.
154 /// Do not include user privacy information in the `user_id`.
155
156 /// `user_id` can be used to distinguish user identities on your side to help us with content safety review.
157 /// `user_id` can be used for KVCache isolation for privacy management.
158 /// `user_id` can be used for scheduling isolation of users on your business side.
159 /// For more details on the `user_id` parameter, please refer to [Rate Limit & Isolation](https://api-docs.deepseek.com/quick_start/rate_limit)
160 #[builder(default)]
161 #[serde(skip_serializing_if = "Option::is_none")]
162 pub user_id: Option<String>,
163 }
164 /// Beta chat message variants.
165 #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]
166 #[serde(tag = "role", rename_all = "snake_case")]
167 pub enum BetaChatMessage {
168 System {
169 /// The contents of the system message.
170 content: String,
171 /// An optional name for the participant. Provides the model information to differentiate between participants of the same role.
172 #[serde(skip_serializing_if = "Option::is_none")]
173 name: Option<String>,
174 },
175 User {
176 /// The contents of the user message.
177 content: String,
178 /// An optional name for the participant. Provides the model information to differentiate between participants of the same role.
179 #[serde(skip_serializing_if = "Option::is_none")]
180 name: Option<String>,
181 },
182 Assistant {
183 /// The contents of the assistant message.
184 #[serde(skip_serializing_if = "Option::is_none")]
185 content: Option<String>,
186 /// An optional name for the participant. Provides the model information to differentiate between participants of the same role.
187 #[serde(skip_serializing_if = "Option::is_none")]
188 name: Option<String>,
189 /// (Beta) Set this to `true` to force the model to start its answer by the content of the supplied prefix in this `assistant` message.
190 /// You must set `base_url="https://api.deepseek.com/beta"` to use this feature.
191 #[serde(default, skip_serializing_if = "is_false")]
192 prefix: bool,
193 /// (Beta) Used for the thinking mode in the [Chat Prefix Completion](https://api-docs.deepseek.com/guides/chat_prefix_completion)
194 /// feature as the input for the CoT in the last assistant message.
195 /// When using this feature, the `prefix` parameter must be set to `true`.
196 #[serde(skip_serializing_if = "Option::is_none")]
197 reasoning_content: Option<String>,
198 #[serde(skip_serializing_if = "is_none_or_empty_vec")]
199 tool_calls: Option<Vec<ToolCall>>,
200 },
201 Tool {
202 /// The contents of the tool message.
203 content: String,
204 /// Tool call that this message is responding to.
205 tool_call_id: String,
206 },
207 }
208 /// Tool definition for beta chat requests.
209 #[derive(Clone, Debug, PartialEq, Eq, Serialize)]
210 pub struct Tool {
211 #[serde(rename = "type")]
212 pub typ: ToolType,
213 pub function: BetaToolFunctionDefinition,
214 }
215
216 impl Tool {
217 pub fn new(
218 name: impl Into<String>,
219 description: impl Into<String>,
220 parameters: Option<serde_json::Value>,
221 strict: Option<bool>,
222 ) -> Self {
223 Tool {
224 typ: ToolType::Function,
225 function: BetaToolFunctionDefinition {
226 name: name.into(),
227 description: description.into(),
228 parameters,
229 strict,
230 },
231 }
232 }
233 }
234 /// Tool function definition for beta chat requests.
235 #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]
236 pub struct BetaToolFunctionDefinition {
237 pub description: String,
238 pub name: String,
239 #[serde(skip_serializing_if = "Option::is_none")]
240 pub parameters: Option<serde_json::Value>,
241 /// (Beta) Default value: `false`
242 ///
243 /// If set to true, the API will use strict-mode for the tool calls to ensure the output always complies with the function's JSON schema.
244 /// This is a Beta feature, for more details please refer to [Tool Calls Guide](https://api-docs.deepseek.com/zh-cn/guides/tool_calls)
245 #[serde(skip_serializing_if = "Option::is_none")]
246 pub strict: Option<bool>,
247 }
248
249 impl BetaChatRequestBuilder {
250 fn validate(&self) -> Result<(), String> {
251 // derive_builder + strip_option makes Option<T> fields become Option<Option<T>> here;
252 // flatten() treats "unset" and "explicit None" uniformly for validation.
253 if let Some(temperature) = self.temperature.flatten()
254 && !(0.0..=2.0).contains(&temperature) {
255 return Err("temperature must be between 0 and 2".to_string());
256 }
257
258 if let Some(top_p) = self.top_p.flatten()
259 && !(0.0..=1.0).contains(&top_p) {
260 return Err("top_p must be between 0 and 1".to_string());
261 }
262
263 if let Some(top_logprobs) = self.top_logprobs.flatten() {
264 if top_logprobs > 20 {
265 return Err("top_logprobs must be <= 20".to_string());
266 }
267 if self.logprobs.flatten() != Some(true) {
268 return Err("top_logprobs requires logprobs=true".to_string());
269 }
270 }
271
272 if let Some(stream) = self.stream.flatten()
273 && !stream && self.stream_options.is_some() {
274 return Err("stream_options cannot be set when stream is false".to_string());
275 }
276
277 if let Some(messages) = self.messages.as_ref() {
278 messages.iter().try_for_each(|message| {
279 if let BetaChatMessage::Assistant {
280 prefix: false,
281 reasoning_content: Some(_),
282 ..
283 } = message
284 {
285 return Err(
286 "reasoning_content cannot be set when assistant message prefix is false".to_string(),
287 );
288 }
289 Ok(())
290 })?;
291 }
292
293 if let Some(stop) = self.stop.as_ref().and_then(|s| s.as_ref())
294 && let Stop::Many(values) = stop
295 && values.len() > 16 {
296 return Err("a maximum of 16 stop sequences are allowed".to_string());
297 }
298
299 if let Some(user_id) = self.user_id.as_ref().and_then(|u| u.as_ref()) {
300 if user_id.len() > 512 {
301 return Err("user_id must be at most 512 characters".to_string());
302 }
303 if !user_id
304 .chars()
305 .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
306 {
307 return Err(
308 "user_id must only contain [a-zA-Z0-9\\-_]".to_string(),
309 );
310 }
311 }
312
313 Ok(())
314 }
315 }
316
317 impl DeepSeekRequest for BetaChatRequest {
318 type Response = Chat;
319 type StreamItem = ChatStreamItem;
320 type BlockingStream = ChatStreamBlocking;
321
322 async fn send(self) -> Result<Chat, DeepSeekError> {
323 let client = self.client.clone();
324 api_post("/chat/completions", &self, client).await
325 }
326
327 async fn stream(self) -> Result<mpsc::Receiver<ChatStreamItem>, DeepSeekError> {
328 let mut request = self;
329 request.stream = Some(true);
330
331 let client = request.client.clone();
332 let mut event_source = api_request_stream(
333 Method::POST,
334 "/chat/completions",
335 |builder| builder.json(&request),
336 client,
337 )
338 .await?;
339
340 let (tx, rx) = mpsc::channel(32);
341
342 tokio::spawn(async move {
343 while let Some(event) = event_source.next().await {
344 match event {
345 Ok(Event::Open) => {}
346 Ok(Event::Message(message)) => {
347 if message.data == "[DONE]" {
348 break;
349 }
350 match serde_json::from_str::<ChatStream>(&message.data) {
351 Ok(chunk) => {
352 if tx.send(Ok(chunk)).await.is_err() {
353 break;
354 }
355 }
356 Err(err) => {
357 let _ = tx
358 .send(Err(DeepSeekError::decode(
359 err.to_string(),
360 message.data,
361 )))
362 .await;
363 break;
364 }
365 }
366 }
367 Err(err) => {
368 let _ = tx
369 .send(Err(DeepSeekError::decode(err.to_string(), String::new())))
370 .await;
371 break;
372 }
373 }
374 }
375 });
376
377 Ok(rx)
378 }
379
380 fn stream_blocking(self) -> Result<ChatStreamBlocking, DeepSeekError> {
381 let (tx, rx) = std_mpsc::channel();
382
383 std::thread::spawn(move || {
384 let runtime = match tokio::runtime::Builder::new_current_thread()
385 .enable_all()
386 .build()
387 {
388 Ok(runtime) => runtime,
389 Err(err) => {
390 let _ = tx.send(Err(DeepSeekError::decode(err.to_string(), String::new())));
391 return;
392 }
393 };
394
395 runtime.block_on(async move {
396 match self.stream().await {
397 Ok(mut stream_rx) => {
398 while let Some(item) = stream_rx.recv().await {
399 if tx.send(item).is_err() {
400 break;
401 }
402 }
403 }
404 Err(err) => {
405 let _ = tx.send(Err(err));
406 }
407 }
408 });
409 });
410
411 Ok(ChatStreamBlocking { rx })
412 }
413 }
414}
415
416#[cfg(test)]
417mod tests {
418 use super::request::*;
419 use crate::{DEFAULT_BETA_BASE_URL, DeepSeekClient, DeepSeekRequest, chat::request::Thinking};
420
421 fn get_client() -> DeepSeekClient {
422 DeepSeekClient::new(
423 std::env::var("DEEPSEEK_API").expect("DEEPSEEK_API is not set"),
424 DEFAULT_BETA_BASE_URL.clone(),
425 )
426 }
427
428 fn get_builder() -> BetaChatRequestBuilder {
429 BetaChatRequestBuilder::default()
430 .client(get_client())
431 .model("deepseek-v4-flash")
432 .max_tokens(32_u32)
433 .thinking(Thinking::disabled())
434 }
435
436 #[tokio::test]
437 async fn beta_chat() {
438 let req = get_builder()
439 .message(BetaChatMessage::User {
440 content: "Please write quick sort code".to_string(),
441 name: None,
442 })
443 .message(BetaChatMessage::Assistant {
444 content: Some("```python\n".to_string()),
445 name: None,
446 prefix: true,
447 reasoning_content: None,
448 tool_calls: None,
449 })
450 .stop("```")
451 .build()
452 .unwrap();
453 let response = req.send().await.unwrap();
454 println!("{:#?}", response);
455 }
456}