llm_tokenizer/encoders/
deepseek_v4.rs

1// Ported from https://huggingface.co/deepseek-ai/DeepSeek-V4-Flash/blob/main/encoding/encoding_dsv4.py
2
3use std::fmt::Write as _;
4
5use serde_json::{json, Value};
6use thiserror::Error;
7
8// Reuse the public ThinkingMode enum from the V3.2 module to keep the
9// "thinking" / "chat" mode invariant identical across DeepSeek versions.
10pub use super::deepseek_v32::ThinkingMode;
11
12/// Reasoning effort for the V4 prompt prefix.
13///
14/// Mirrors the Python `reasoning_effort` parameter, which only accepts
15/// `None`, `"high"`, or `"max"`. Only `Max` actually emits a prefix today;
16/// `High` is accepted for parity with the Python signature.
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18pub enum ReasoningEffort {
19    High,
20    Max,
21}
22
23/// Parameters for [`encode_messages`].
24///
25/// `context` is intentionally omitted: SMG always renders from scratch, so
26/// the Python default of `context=None` always applies.
27#[derive(Debug, Clone, Copy)]
28pub struct EncodeParams {
29    pub add_default_bos_token: bool,
30    pub drop_thinking: bool,
31    pub reasoning_effort: Option<ReasoningEffort>,
32}
33impl Default for EncodeParams {
34    fn default() -> Self {
35        Self {
36            add_default_bos_token: true,
37            drop_thinking: true,
38            reasoning_effort: None,
39        }
40    }
41}
42
43/// Errors raised when a message list is malformed.
44///
45/// V4-local error type. The variants overlap with V3.2 but are kept
46/// independent so each encoder file is a standalone translation of its
47/// Python source.
48#[derive(Debug, Error)]
49pub enum DsEncodingError {
50    #[error("Index {index} out of range for messages list of length {len}")]
51    IndexOutOfRange { index: usize, len: usize },
52    #[error("Invalid message for role `{role}`: {msg}")]
53    InvalidMessage { role: String, msg: String },
54    #[error("Unknown role: {0}")]
55    UnknownRole(String),
56    #[error("DeepSeek V4 merges tool messages into user; preprocess via merge_tool_messages first (got tool message at index {0})")]
57    UnmergedToolRole(usize),
58    #[error(
59        "Invalid task `{0}`. Valid tasks are: action, query, authority, domain, title, read_url"
60    )]
61    InvalidTask(String),
62}
63
64// ---------------------------------------------------------------------------
65// Special-token constants — copied verbatim from the Python source.
66// ---------------------------------------------------------------------------
67pub const BOS_TOKEN: &str = "<｜begin▁of▁sentence｜>";
68pub const EOS_TOKEN: &str = "<｜end▁of▁sentence｜>";
69pub const THINKING_START_TOKEN: &str = "<think>";
70pub const THINKING_END_TOKEN: &str = "</think>";
71pub const DSML_TOKEN: &str = "｜DSML｜";
72const USER_SP_TOKEN: &str = "<｜User｜>";
73const ASSISTANT_SP_TOKEN: &str = "<｜Assistant｜>";
74const LATEST_REMINDER_SP_TOKEN: &str = "<｜latest_reminder｜>";
75const TOOL_CALLS_BLOCK_NAME: &str = "tool_calls";
76// Quick-instruction "task" tokens (`<｜action｜>`, `<｜query｜>`, etc.)
77const TASK_ACTION: &str = "<｜action｜>";
78const TASK_QUERY: &str = "<｜query｜>";
79const TASK_AUTHORITY: &str = "<｜authority｜>";
80const TASK_DOMAIN: &str = "<｜domain｜>";
81const TASK_TITLE: &str = "<｜title｜>";
82const TASK_READ_URL: &str = "<｜read_url｜>";
83fn task_sp_token(task: &str) -> Option<&'static str> {
84    match task {
85        "action" => Some(TASK_ACTION),
86        "query" => Some(TASK_QUERY),
87        "authority" => Some(TASK_AUTHORITY),
88        "domain" => Some(TASK_DOMAIN),
89        "title" => Some(TASK_TITLE),
90        "read_url" => Some(TASK_READ_URL),
91        _ => None,
92    }
93}
94
95// ---------------------------------------------------------------------------
96// Templates
97// ---------------------------------------------------------------------------
98const REASONING_EFFORT_MAX: &str = "Reasoning Effort: Absolute maximum with no shortcuts permitted.\nYou MUST be very thorough in your thinking and comprehensively decompose the problem to resolve the root cause, rigorously stress-testing your logic against all potential paths, edge cases, and adversarial scenarios.\nExplicitly write out your entire deliberation process, documenting every intermediate step, considered alternative, and rejected hypothesis to ensure absolutely no assumption is left unchecked.\n\n";
99
100/// Mirrors V4's `TOOLS_TEMPLATE`. The block name is `tool_calls` (not
101/// `function_calls` like V3.2) and the wording is updated.
102fn render_tools_template(tool_schemas: &str) -> String {
103    let dsml = DSML_TOKEN;
104    let tcb = TOOL_CALLS_BLOCK_NAME;
105    let tstart = THINKING_START_TOKEN;
106    let tend = THINKING_END_TOKEN;
107    format!(
108"## Tools
109
110You have access to a set of tools to help answer the user's question. You can invoke tools by writing a \"<{dsml}{tcb}>\" block like the following:
111
112<{dsml}{tcb}>
113<{dsml}invoke name=\"$TOOL_NAME\">
114<{dsml}parameter name=\"$PARAMETER_NAME\" string=\"true|false\">$PARAMETER_VALUE</{dsml}parameter>
115...
116</{dsml}invoke>
117<{dsml}invoke name=\"$TOOL_NAME2\">
118...
119</{dsml}invoke>
120</{dsml}{tcb}>
121
122String parameters should be specified as is and set `string=\"true\"`. For all other types (numbers, booleans, arrays, objects), pass the value in JSON format and set `string=\"false\"`.
123
124If thinking_mode is enabled (triggered by {tstart}), you MUST output your complete reasoning inside {tstart}...{tend} BEFORE any tool calls or final response.
125
126Otherwise, output directly after {tend} with tool calls or final response.
127
128### Available Tool Schemas
129
130{tool_schemas}
131
132You MUST strictly follow the above defined tool name and parameter schemas to invoke tool calls.
133"
134    )
135}
136
137// ---------------------------------------------------------------------------
138// JSON helpers (mirror V3.2)
139// ---------------------------------------------------------------------------
140fn to_json(value: &Value) -> String {
141    serde_json::to_string(value).unwrap_or_else(|_| "null".to_string())
142}
143fn tools_from_openai_format(tools: &[Value]) -> Vec<Value> {
144    tools
145        .iter()
146        .filter_map(|t| t.get("function").cloned())
147        .collect()
148}
149fn tool_calls_from_openai_format(tool_calls: &[Value]) -> Vec<Value> {
150    tool_calls
151        .iter()
152        .filter_map(|tc| {
153            let f = tc.get("function")?;
154            Some(json!({
155                "name": f.get("name").cloned().unwrap_or(Value::Null),
156                "arguments": f.get("arguments").cloned().unwrap_or(Value::Null),
157            }))
158        })
159        .collect()
160}
161
162/// V4 differs from V3.2: when `arguments` fails to JSON-parse, the upstream
163/// wraps the raw string in `{"arguments": <raw>}` instead of erroring.
164fn encode_arguments_to_dsml(tool_call: &Value) -> String {
165    let arguments_str = tool_call
166        .get("arguments")
167        .and_then(|v| v.as_str())
168        .unwrap_or("{}");
169    let arguments: Value = match serde_json::from_str(arguments_str) {
170        Ok(v) => v,
171        Err(_) => json!({ "arguments": arguments_str }),
172    };
173    let obj = match arguments.as_object() {
174        Some(obj) => obj,
175        None => return String::new(),
176    };
177    let mut parts = Vec::with_capacity(obj.len());
178    for (k, v) in obj {
179        let (is_str, value_str) = match v {
180            Value::String(s) => ("true", s.clone()),
181            other => ("false", to_json(other)),
182        };
183        parts.push(format!(
184            "<{DSML_TOKEN}parameter name=\"{k}\" string=\"{is_str}\">{value_str}</{DSML_TOKEN}parameter>",
185        ));
186    }
187    parts.join("\n")
188}
189
190fn render_tools(tools: &[Value]) -> String {
191    let schemas: Vec<String> = tools.iter().map(to_json).collect();
192    render_tools_template(&schemas.join("\n"))
193}
194fn find_last_user_index(messages: &[Value]) -> Option<usize> {
195    for idx in (0..messages.len()).rev() {
196        let role = messages[idx].get("role").and_then(|v| v.as_str());
197        if matches!(role, Some("user") | Some("developer")) {
198            return Some(idx);
199        }
200    }
201    None
202}
203fn at_or_after_last_user(index: usize, last_user_idx: Option<usize>) -> bool {
204    match last_user_idx {
205        Some(idx) => index >= idx,
206        None => true,
207    }
208}
209fn after_last_user(index: usize, last_user_idx: Option<usize>) -> bool {
210    match last_user_idx {
211        Some(idx) => index > idx,
212        None => true,
213    }
214}
215
216// ---------------------------------------------------------------------------
217// render_message — direct port of the V4 Python function with the same name.
218// ---------------------------------------------------------------------------
219#[expect(
220    clippy::too_many_lines,
221    reason = "mirrors the Python render_message function 1:1 for sync-ability"
222)]
223fn render_message(
224    index: usize,
225    messages: &[Value],
226    thinking_mode: ThinkingMode,
227    drop_thinking: bool,
228    reasoning_effort: Option<ReasoningEffort>,
229) -> Result<String, DsEncodingError> {
230    if index >= messages.len() {
231        return Err(DsEncodingError::IndexOutOfRange {
232            index,
233            len: messages.len(),
234        });
235    }
236    let mut prompt = String::new();
237    let msg = &messages[index];
238    let last_user_idx = find_last_user_index(messages);
239
240    let role = msg.get("role").and_then(|v| v.as_str()).unwrap_or("");
241    let content = msg.get("content").and_then(|v| v.as_str()).unwrap_or("");
242    let tools_raw = msg.get("tools").and_then(|v| v.as_array());
243    let response_format = msg.get("response_format");
244    let tool_calls_raw = msg.get("tool_calls").and_then(|v| v.as_array());
245    let reasoning_content = msg
246        .get("reasoning_content")
247        .and_then(|v| v.as_str())
248        .unwrap_or("");
249    let wo_eos = msg.get("wo_eos").and_then(|v| v.as_bool()).unwrap_or(false);
250    let tools_owned = tools_raw.map(|t| tools_from_openai_format(t));
251    let tools = tools_owned.as_deref();
252    let tool_calls_owned = tool_calls_raw.map(|tc| tool_calls_from_openai_format(tc));
253    let tool_calls = tool_calls_owned.as_deref();
254
255    // Reasoning effort prefix (only at index 0 in thinking mode with max effort)
256    if index == 0
257        && thinking_mode == ThinkingMode::Thinking
258        && reasoning_effort == Some(ReasoningEffort::Max)
259    {
260        prompt.push_str(REASONING_EFFORT_MAX);
261    }
262
263    match role {
264        "system" => {
265            prompt.push_str(content);
266            if let Some(tools) = tools.filter(|t| !t.is_empty()) {
267                prompt.push_str("\n\n");
268                prompt.push_str(&render_tools(tools));
269            }
270            if let Some(rf) = response_format {
271                prompt.push_str("\n\n");
272                prompt.push_str(&format!(
273                    "## Response Format:\n\nYou MUST strictly adhere to the following schema to reply:\n{}",
274                    to_json(rf)
275                ));
276            }
277        }
278
279        "developer" => {
280            if content.is_empty() {
281                return Err(DsEncodingError::InvalidMessage {
282                    role: role.to_string(),
283                    msg: msg.to_string(),
284                });
285            }
286            let mut content_developer = String::new();
287            content_developer.push_str(USER_SP_TOKEN);
288            content_developer.push_str(content);
289            if let Some(tools) = tools.filter(|t| !t.is_empty()) {
290                content_developer.push_str("\n\n");
291                content_developer.push_str(&render_tools(tools));
292            }
293            if let Some(rf) = response_format {
294                content_developer.push_str("\n\n");
295                let _ = write!(
296                    content_developer,
297                    "## Response Format:\n\nYou MUST strictly adhere to the following schema to reply:\n{}",
298                    to_json(rf)
299                );
300            }
301            prompt.push_str(&content_developer);
302        }
303
304        "user" => {
305            prompt.push_str(USER_SP_TOKEN);
306            // Handle content blocks (tool results mixed with text)
307            if let Some(content_blocks) = msg.get("content_blocks").and_then(|v| v.as_array()) {
308                let mut parts: Vec<String> = Vec::with_capacity(content_blocks.len());
309                for block in content_blocks {
310                    let block_type = block.get("type").and_then(|v| v.as_str()).unwrap_or("");
311                    match block_type {
312                        "text" => {
313                            let text = block.get("text").and_then(|v| v.as_str()).unwrap_or("");
314                            parts.push(text.to_string());
315                        }
316                        "tool_result" => {
317                            let tc = block.get("content");
318                            let tool_content = match tc {
319                                Some(Value::Array(items)) => {
320                                    let mut text_parts: Vec<String> =
321                                        Vec::with_capacity(items.len());
322                                    for b in items {
323                                        let bt =
324                                            b.get("type").and_then(|v| v.as_str()).unwrap_or("");
325                                        if bt == "text" {
326                                            text_parts.push(
327                                                b.get("text")
328                                                    .and_then(|v| v.as_str())
329                                                    .unwrap_or("")
330                                                    .to_string(),
331                                            );
332                                        } else {
333                                            text_parts.push(format!("[Unsupported {bt}]"));
334                                        }
335                                    }
336                                    text_parts.join("\n\n")
337                                }
338                                Some(Value::String(s)) => s.clone(),
339                                Some(other) => to_json(other),
340                                None => String::new(),
341                            };
342                            parts.push(format!("<tool_result>{tool_content}</tool_result>"));
343                        }
344                        other => parts.push(format!("[Unsupported {other}]")),
345                    }
346                }
347                prompt.push_str(&parts.join("\n\n"));
348            } else {
349                prompt.push_str(content);
350            }
351        }
352
353        "latest_reminder" => {
354            prompt.push_str(LATEST_REMINDER_SP_TOKEN);
355            prompt.push_str(content);
356        }
357        "tool" => {
358            return Err(DsEncodingError::UnmergedToolRole(index));
359        }
360
361        "assistant" => {
362            let mut thinking_part = String::new();
363            let mut tc_content = String::new();
364            if let Some(tcs) = tool_calls.filter(|t| !t.is_empty()) {
365                let mut tc_list = Vec::with_capacity(tcs.len());
366                for tc in tcs {
367                    let name = tc.get("name").and_then(|v| v.as_str()).unwrap_or("");
368                    let args = encode_arguments_to_dsml(tc);
369                    tc_list.push(format!(
370                        "<{DSML_TOKEN}invoke name=\"{name}\">\n{args}\n</{DSML_TOKEN}invoke>"
371                    ));
372                }
373                let joined = tc_list.join("\n");
374                let _ = write!(
375                    tc_content,
376                    "\n\n<{DSML_TOKEN}{TOOL_CALLS_BLOCK_NAME}>\n{joined}\n</{DSML_TOKEN}{TOOL_CALLS_BLOCK_NAME}>"
377                );
378            }
379            // prev_has_task: if previous message had a task, this is a task
380            // output (no thinking).
381            let prev_has_task = if index >= 1 {
382                messages[index - 1].get("task").is_some()
383                    && !messages[index - 1]
384                        .get("task")
385                        .map(Value::is_null)
386                        .unwrap_or(true)
387            } else {
388                false
389            };
390            if thinking_mode == ThinkingMode::Thinking && !prev_has_task {
391                let emit = !drop_thinking || after_last_user(index, last_user_idx);
392                if emit {
393                    thinking_part.push_str(reasoning_content);
394                    thinking_part.push_str(THINKING_END_TOKEN);
395                }
396            }
397            prompt.push_str(&thinking_part);
398            prompt.push_str(content);
399            prompt.push_str(&tc_content);
400            if !wo_eos {
401                prompt.push_str(EOS_TOKEN);
402            }
403        }
404        other => return Err(DsEncodingError::UnknownRole(other.to_string())),
405    }
406
407    // Append transition tokens based on what follows.
408    if let Some(next) = messages.get(index + 1) {
409        let next_role = next.get("role").and_then(|v| v.as_str()).unwrap_or("");
410        if !matches!(next_role, "assistant" | "latest_reminder") {
411            return Ok(prompt);
412        }
413    }
414
415    let task = messages[index]
416        .get("task")
417        .and_then(|v| v.as_str())
418        .filter(|s| !s.is_empty());
419    if let Some(task) = task {
420        let sp_token =
421            task_sp_token(task).ok_or_else(|| DsEncodingError::InvalidTask(task.to_string()))?;
422        if task == "action" {
423            // Action task: append Assistant + thinking token + action sp token.
424            prompt.push_str(ASSISTANT_SP_TOKEN);
425            prompt.push_str(if thinking_mode == ThinkingMode::Thinking {
426                THINKING_START_TOKEN
427            } else {
428                THINKING_END_TOKEN
429            });
430            prompt.push_str(sp_token);
431        } else {
432            // Non-action tasks: append task sp token directly after the message.
433            prompt.push_str(sp_token);
434        }
435    } else if matches!(role, "user" | "developer") {
436        // Normal generation: append Assistant + thinking token.
437        prompt.push_str(ASSISTANT_SP_TOKEN);
438        let opens_thinking = thinking_mode == ThinkingMode::Thinking
439            && (!drop_thinking || at_or_after_last_user(index, last_user_idx));
440        if opens_thinking {
441            prompt.push_str(THINKING_START_TOKEN);
442        } else {
443            prompt.push_str(THINKING_END_TOKEN);
444        }
445    }
446    Ok(prompt)
447}
448
449// ---------------------------------------------------------------------------
450// Preprocessing: merge tool messages and sort tool results.
451// ---------------------------------------------------------------------------
452fn merge_tool_messages(messages: &[Value]) -> Vec<Value> {
453    let mut merged: Vec<Value> = Vec::with_capacity(messages.len());
454    for msg in messages {
455        let msg = msg.clone();
456        let role = msg.get("role").and_then(|v| v.as_str()).unwrap_or("");
457        if role == "tool" {
458            let tool_block = json!({
459                "type": "tool_result",
460                "tool_use_id": msg.get("tool_call_id").cloned().unwrap_or(Value::String(String::new())),
461                "content": msg.get("content").cloned().unwrap_or(Value::String(String::new())),
462            });
463            // Append to a previous user message that already has content_blocks.
464            let appended = if let Some(prev) = merged.last_mut() {
465                let prev_role = prev.get("role").and_then(|v| v.as_str()).unwrap_or("");
466                if prev_role == "user" && prev.get("content_blocks").is_some() {
467                    if let Some(blocks) = prev
468                        .get_mut("content_blocks")
469                        .and_then(|v| v.as_array_mut())
470                    {
471                        blocks.push(tool_block.clone());
472                        true
473                    } else {
474                        false
475                    }
476                } else {
477                    false
478                }
479            } else {
480                false
481            };
482            if !appended {
483                merged.push(json!({
484                    "role": "user",
485                    "content_blocks": [tool_block],
486                }));
487            }
488        } else if role == "user" {
489            let text_block = json!({
490                "type": "text",
491                "text": msg.get("content").cloned().unwrap_or(Value::String(String::new())),
492            });
493            let merged_into_prev = if let Some(prev) = merged.last_mut() {
494                let prev_role = prev.get("role").and_then(|v| v.as_str()).unwrap_or("");
495                let prev_has_blocks = prev.get("content_blocks").is_some();
496                let prev_task_none = prev.get("task").map(Value::is_null).unwrap_or(true);
497                if prev_role == "user" && prev_has_blocks && prev_task_none {
498                    if let Some(blocks) = prev
499                        .get_mut("content_blocks")
500                        .and_then(|v| v.as_array_mut())
501                    {
502                        blocks.push(text_block.clone());
503                        true
504                    } else {
505                        false
506                    }
507                } else {
508                    false
509                }
510            } else {
511                false
512            };
513            if !merged_into_prev {
514                let mut new_msg = json!({
515                    "role": "user",
516                    "content": msg.get("content").cloned().unwrap_or(Value::String(String::new())),
517                    "content_blocks": [text_block],
518                });
519                // Preserve extra fields (task, wo_eos, mask, etc.).
520                if let Some(obj) = new_msg.as_object_mut() {
521                    for key in ["task", "wo_eos", "mask"] {
522                        if let Some(v) = msg.get(key) {
523                            obj.insert(key.to_string(), v.clone());
524                        }
525                    }
526                }
527                merged.push(new_msg);
528            }
529        } else {
530            merged.push(msg);
531        }
532    }
533    merged
534}
535
536/// Sort `tool_result` blocks within user messages by the tool-call order
537/// of the *preceding* assistant turn.
538fn sort_tool_results_by_call_order(messages: Vec<Value>) -> Vec<Value> {
539    let mut out = messages;
540    let mut last_tool_call_order: std::collections::HashMap<String, usize> =
541        std::collections::HashMap::new();
542    for msg in &mut out {
543        let role = msg.get("role").and_then(|v| v.as_str()).unwrap_or("");
544        if role == "assistant" {
545            if let Some(tcs) = msg.get("tool_calls").and_then(|v| v.as_array()) {
546                last_tool_call_order.clear();
547                for (idx, tc) in tcs.iter().enumerate() {
548                    let tc_id = tc
549                        .get("id")
550                        .and_then(|v| v.as_str())
551                        .map(str::to_string)
552                        .or_else(|| {
553                            tc.get("function")
554                                .and_then(|f| f.get("id"))
555                                .and_then(|v| v.as_str())
556                                .map(str::to_string)
557                        });
558                    if let Some(id) = tc_id {
559                        last_tool_call_order.insert(id, idx);
560                    }
561                }
562            }
563        } else if role == "user" {
564            if let Some(blocks) = msg.get("content_blocks").and_then(|v| v.as_array()) {
565                let tool_blocks: Vec<&Value> = blocks
566                    .iter()
567                    .filter(|b| b.get("type").and_then(|v| v.as_str()) == Some("tool_result"))
568                    .collect();
569                if tool_blocks.len() > 1 && !last_tool_call_order.is_empty() {
570                    let mut sorted: Vec<Value> = tool_blocks.iter().map(|b| (*b).clone()).collect();
571                    sorted.sort_by_key(|b| {
572                        b.get("tool_use_id")
573                            .and_then(|v| v.as_str())
574                            .and_then(|id| last_tool_call_order.get(id).copied())
575                            .unwrap_or(0)
576                    });
577                    let mut sorted_idx = 0;
578                    let mut new_blocks: Vec<Value> = Vec::with_capacity(blocks.len());
579                    for block in blocks {
580                        if block.get("type").and_then(|v| v.as_str()) == Some("tool_result") {
581                            new_blocks.push(sorted[sorted_idx].clone());
582                            sorted_idx += 1;
583                        } else {
584                            new_blocks.push(block.clone());
585                        }
586                    }
587                    if let Some(obj) = msg.as_object_mut() {
588                        obj.insert("content_blocks".to_string(), Value::Array(new_blocks));
589                    }
590                }
591            }
592        }
593    }
594    out
595}
596
597/// Drop reasoning_content from earlier assistant turns and remove non-essential
598/// developer messages before the last user.
599fn drop_thinking_messages(messages: &[Value]) -> Vec<Value> {
600    let last_user_idx = find_last_user_index(messages);
601    let mut out: Vec<Value> = Vec::with_capacity(messages.len());
602    for (idx, msg) in messages.iter().enumerate() {
603        let role = msg.get("role").and_then(|v| v.as_str()).unwrap_or("");
604        let always_keep = matches!(
605            role,
606            "user" | "system" | "tool" | "latest_reminder" | "direct_search_results"
607        ) || at_or_after_last_user(idx, last_user_idx);
608        if always_keep {
609            out.push(msg.clone());
610            continue;
611        }
612        if role == "assistant" {
613            let mut cloned = msg.clone();
614            if let Some(obj) = cloned.as_object_mut() {
615                obj.remove("reasoning_content");
616            }
617            out.push(cloned);
618        }
619        // developer + other roles before last_user_idx are dropped.
620    }
621    out
622}
623
624// ---------------------------------------------------------------------------
625// encode_messages — public entry point
626// ---------------------------------------------------------------------------
627/// Encode a list of OpenAI-style messages into a DeepSeek V4 prompt string.
628///
629/// The signature mirrors the Python `encode_messages` function;
630/// `context` is omitted because SMG always renders from scratch.
631#[expect(
632    clippy::trivially_copy_pass_by_ref,
633    reason = "public API mirrors the documented Rust signature with a borrow"
634)]
635pub fn encode_messages(
636    messages: &[Value],
637    thinking_mode: ThinkingMode,
638    params: &EncodeParams,
639) -> Result<String, DsEncodingError> {
640    // Preprocess: merge tool messages and sort tool results.
641    let merged = merge_tool_messages(messages);
642    let mut full_messages = sort_tool_results_by_call_order(merged);
643    let mut prompt = if params.add_default_bos_token {
644        BOS_TOKEN.to_string()
645    } else {
646        String::new()
647    };
648    // Resolve drop_thinking: if any message has tools defined, never drop.
649    let mut effective_drop_thinking = params.drop_thinking;
650    if full_messages
651        .iter()
652        .any(|m| m.get("tools").is_some_and(|v| !v.is_null()))
653    {
654        effective_drop_thinking = false;
655    }
656    if thinking_mode == ThinkingMode::Thinking && effective_drop_thinking {
657        full_messages = drop_thinking_messages(&full_messages);
658    }
659    for idx in 0..full_messages.len() {
660        prompt.push_str(&render_message(
661            idx,
662            &full_messages,
663            thinking_mode,
664            effective_drop_thinking,
665            params.reasoning_effort,
666        )?);
667    }
668    Ok(prompt)
669}
670
671// ---------------------------------------------------------------------------
672// Tests
673// ---------------------------------------------------------------------------
674#[cfg(test)]
675mod tests {
676    use serde_json::json;
677
678    use super::*;
679    fn user(text: &str) -> Value {
680        json!({ "role": "user", "content": text })
681    }
682    #[test]
683    fn one_turn_user_chat_mode() {
684        let msgs = [user("Hello")];
685        let out = encode_messages(&msgs, ThinkingMode::Chat, &EncodeParams::default()).unwrap();
686        let expected =
687            format!("{BOS_TOKEN}{USER_SP_TOKEN}Hello{ASSISTANT_SP_TOKEN}{THINKING_END_TOKEN}");
688        assert_eq!(out, expected);
689    }
690    #[test]
691    fn one_turn_user_thinking_mode() {
692        let msgs = [user("Hello")];
693        let out = encode_messages(&msgs, ThinkingMode::Thinking, &EncodeParams::default()).unwrap();
694        let expected =
695            format!("{BOS_TOKEN}{USER_SP_TOKEN}Hello{ASSISTANT_SP_TOKEN}{THINKING_START_TOKEN}");
696        assert_eq!(out, expected);
697    }
698
699    #[test]
700    fn reasoning_effort_max_prepends_prefix() {
701        let msgs = [user("Hello")];
702        let params = EncodeParams {
703            reasoning_effort: Some(ReasoningEffort::Max),
704            ..EncodeParams::default()
705        };
706        let out = encode_messages(&msgs, ThinkingMode::Thinking, &params).unwrap();
707        // The prefix appears immediately after BOS, before the user message.
708        let expected_start = format!("{BOS_TOKEN}{REASONING_EFFORT_MAX}");
709        assert!(
710            out.starts_with(&expected_start),
711            "expected prompt to start with BOS+REASONING_EFFORT_MAX, got: {:?}",
712            &out[..120.min(out.len())]
713        );
714        // Without max effort, the prefix is absent.
715        let out_chat = encode_messages(&msgs, ThinkingMode::Chat, &params).unwrap();
716        assert!(!out_chat.contains("Reasoning Effort"));
717    }
718
719    #[test]
720    fn quick_instruction_action_token() {
721        // A user message tagged with `task: "action"` triggers the action
722        // quick-instruction sequence: ASSISTANT_SP + thinking-end + ACTION token.
723        let msgs = [json!({
724            "role": "user",
725            "content": "Take some action",
726            "task": "action",
727        })];
728        let out = encode_messages(&msgs, ThinkingMode::Chat, &EncodeParams::default()).unwrap();
729        let expected = format!(
730            "{BOS_TOKEN}{USER_SP_TOKEN}Take some action{ASSISTANT_SP_TOKEN}{THINKING_END_TOKEN}{TASK_ACTION}"
731        );
732        assert_eq!(out, expected);
733        // Same in thinking mode but uses thinking-start.
734        let out_t =
735            encode_messages(&msgs, ThinkingMode::Thinking, &EncodeParams::default()).unwrap();
736        assert!(out_t.contains(&format!(
737            "{ASSISTANT_SP_TOKEN}{THINKING_START_TOKEN}{TASK_ACTION}"
738        )));
739    }
740    #[test]
741    fn quick_instruction_query_token() {
742        // Non-action quick-instruction tasks just append the task token.
743        let msgs = [json!({
744            "role": "user",
745            "content": "What is X?",
746            "task": "query",
747        })];
748        let out = encode_messages(&msgs, ThinkingMode::Chat, &EncodeParams::default()).unwrap();
749        let expected = format!("{BOS_TOKEN}{USER_SP_TOKEN}What is X?{TASK_QUERY}");
750        assert_eq!(out, expected);
751    }
752
753    #[test]
754    fn assistant_tool_call_renders_dsml() {
755        let msgs = [
756            user("call my tool"),
757            json!({
758                "role": "assistant",
759                "reasoning_content": "thinking about tool",
760                "content": "",
761                "tool_calls": [
762                    {
763                        "type": "function",
764                        "function": {
765                            "name": "search",
766                            "arguments": "{\"query\": \"deepseek\", \"limit\": 5}"
767                        }
768                    }
769                ]
770            }),
771        ];
772        let out = encode_messages(&msgs, ThinkingMode::Thinking, &EncodeParams::default()).unwrap();
773        // V4 wraps in `<｜DSML｜tool_calls>` (not `function_calls` like V3.2).
774        assert!(out.contains(&format!("<{DSML_TOKEN}{TOOL_CALLS_BLOCK_NAME}>")));
775        assert!(out.contains(&format!("<{DSML_TOKEN}invoke name=\"search\">")));
776        assert!(out.contains(&format!(
777            "<{DSML_TOKEN}parameter name=\"query\" string=\"true\">deepseek</{DSML_TOKEN}parameter>"
778        )));
779        assert!(out.contains(&format!(
780            "<{DSML_TOKEN}parameter name=\"limit\" string=\"false\">5</{DSML_TOKEN}parameter>"
781        )));
782        assert!(out.contains(&format!("</{DSML_TOKEN}{TOOL_CALLS_BLOCK_NAME}>")));
783        assert!(out.ends_with(EOS_TOKEN));
784    }
785    #[test]
786    fn unknown_role_errors() {
787        let msgs = [json!({ "role": "moderator", "content": "hi" })];
788        let err = encode_messages(&msgs, ThinkingMode::Chat, &EncodeParams::default()).unwrap_err();
789        assert!(matches!(err, DsEncodingError::UnknownRole(ref r) if r == "moderator"));
790    }
791}
llm_tokenizer/encoders/deepseek_v4.rs

llm_tokenizer/encoders/
deepseek_v4.rs