llm_tokenizer/encoders/
deepseek_v32.rs

1// Ported from https://huggingface.co/deepseek-ai/DeepSeek-V3.2/blob/main/encoding/encoding_dsv32.py
2
3use std::fmt::Write as _;
4
5use serde_json::{json, Value};
6use thiserror::Error;
7
8/// Mode for thinking/reasoning rendering.
9///
10/// Mirrors the Python `thinking_mode` parameter, which only accepts `"chat"`
11/// and `"thinking"`.
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub enum ThinkingMode {
14    Chat,
15    Thinking,
16}
17
18impl ThinkingMode {
19    fn is_thinking(self) -> bool {
20        matches!(self, ThinkingMode::Thinking)
21    }
22}
23
24/// Parameters for [`encode_messages`].
25///
26/// `context` is intentionally omitted: SMG always renders from scratch, so
27/// the Python default of `context=None` always applies.
28#[derive(Debug, Clone, Copy)]
29pub struct EncodeParams {
30    pub add_default_bos_token: bool,
31    pub drop_thinking: bool,
32}
33
34impl Default for EncodeParams {
35    fn default() -> Self {
36        Self {
37            add_default_bos_token: true,
38            drop_thinking: true,
39        }
40    }
41}
42
43/// Errors raised when a message list is malformed.
44///
45/// Mirrors the Python `DS32EncodingError`.
46#[derive(Debug, Error)]
47pub enum DsEncodingError {
48    #[error("Index {index} out of range for messages list of length {len}")]
49    IndexOutOfRange { index: usize, len: usize },
50
51    #[error("Invalid message for role `{role}`: {msg}")]
52    InvalidMessage { role: String, msg: String },
53
54    #[error("Invalid messages at {index}: {context}")]
55    InvalidToolMessages { index: usize, context: String },
56
57    #[error("No tool calls but found tool output")]
58    NoToolCalls,
59
60    #[error("Unknown role: {0}")]
61    UnknownRole(String),
62
63    #[error("thinking mode: invalid message without reasoning_content/tool_calls after last user message: {0}")]
64    MissingReasoningOrToolCalls(String),
65
66    #[error("Failed to parse tool-call arguments as JSON: {0}")]
67    InvalidToolArgumentsJson(#[source] serde_json::Error),
68}
69
70// ---------------------------------------------------------------------------
71// Special-token constants — copied verbatim from the Python source.
72// ---------------------------------------------------------------------------
73
74pub const BOS_TOKEN: &str = "<｜begin▁of▁sentence｜>";
75pub const EOS_TOKEN: &str = "<｜end▁of▁sentence｜>";
76pub const THINKING_START_TOKEN: &str = "<think>";
77pub const THINKING_END_TOKEN: &str = "</think>";
78pub const DSML_TOKEN: &str = "｜DSML｜";
79
80const USER_PREFIX: &str = "<｜User｜>";
81const ASSISTANT_SUFFIX: &str = "<｜Assistant｜>";
82
83// ---------------------------------------------------------------------------
84// Templates
85// ---------------------------------------------------------------------------
86
87/// Mirrors `TOOLS_SYSTEM_TEMPLATE` from the Python source, including the
88/// blank lines between paragraphs that the upstream template preserves.
89fn render_tools_template(tool_schemas: &str) -> String {
90    let dsml = DSML_TOKEN;
91    let tstart = THINKING_START_TOKEN;
92    let tend = THINKING_END_TOKEN;
93    format!(
94"## Tools
95
96You have access to a set of tools you can use to answer the user's question.
97You can invoke functions by writing a \"<{dsml}function_calls>\" block like the following as part of your reply to the user:
98<{dsml}function_calls>
99<{dsml}invoke name=\"$FUNCTION_NAME\">
100<{dsml}parameter name=\"$PARAMETER_NAME\" string=\"true|false\">$PARAMETER_VALUE</{dsml}parameter>
101...
102</{dsml}invoke>
103<{dsml}invoke name=\"$FUNCTION_NAME2\">
104...
105</{dsml}invoke>
106</{dsml}function_calls>
107
108String and scalar parameters should be specified as is without any escaping or quotes, while lists and objects should use JSON format. The \"string\" attribute should be set to \"true\" for string type parameters and \"false\" for other types (numbers, booleans, arrays, objects).
109
110If the thinking_mode is enabled, then after function results you should strongly consider outputting a thinking block. Here is an example:
111
112<{dsml}function_calls>
113...
114</{dsml}function_calls>
115
116<function_results>
117...
118</function_results>
119
120{tstart}...thinking about results{tend}
121
122Here are the functions available in JSONSchema format:
123<functions>
124{tool_schemas}
125</functions>
126"
127    )
128}
129
130fn response_format_block(schema: &str) -> String {
131    format!(
132        "## Response Format:\n\nYou MUST strictly adhere to the following schema to reply:\n{schema}",
133    )
134}
135
136fn user_msg(content: &str) -> String {
137    format!("{USER_PREFIX}{content}{ASSISTANT_SUFFIX}")
138}
139
140// ---------------------------------------------------------------------------
141// JSON helpers
142// ---------------------------------------------------------------------------
143
144/// Mirrors the Python `to_json` helper. serde_json always emits valid UTF-8
145/// without escaping, so the `ensure_ascii` fallback in the Python version is
146/// effectively a no-op here.
147fn to_json(value: &Value) -> String {
148    serde_json::to_string(value).unwrap_or_else(|_| "null".to_string())
149}
150
151/// `[tool["function"] for tool in tools]`
152fn tools_from_openai_format(tools: &[Value]) -> Vec<Value> {
153    tools
154        .iter()
155        .filter_map(|t| t.get("function").cloned())
156        .collect()
157}
158
159/// `[{ "name": tc["function"]["name"], "arguments": tc["function"]["arguments"] } for tc in tool_calls]`
160fn tool_calls_from_openai_format(tool_calls: &[Value]) -> Vec<Value> {
161    tool_calls
162        .iter()
163        .filter_map(|tc| {
164            let f = tc.get("function")?;
165            Some(json!({
166                "name": f.get("name").cloned().unwrap_or(Value::Null),
167                "arguments": f.get("arguments").cloned().unwrap_or(Value::Null),
168            }))
169        })
170        .collect()
171}
172
173/// Mirrors `encode_arguments_to_dsml`. `tool_call["arguments"]` is a JSON
174/// *string* in OpenAI schema; the Python code does `json.loads(...)` and
175/// iterates over the resulting dict.
176fn encode_arguments_to_dsml(tool_call: &Value) -> Result<String, DsEncodingError> {
177    let arguments_str = tool_call
178        .get("arguments")
179        .and_then(|v| v.as_str())
180        .unwrap_or("{}");
181
182    let arguments: Value =
183        serde_json::from_str(arguments_str).map_err(DsEncodingError::InvalidToolArgumentsJson)?;
184
185    let obj = match arguments.as_object() {
186        Some(obj) => obj,
187        // Non-object payload — render nothing, matching Python behaviour
188        // (`for k, v in arguments.items()` would raise; we tolerate it).
189        None => return Ok(String::new()),
190    };
191
192    let mut parts = Vec::with_capacity(obj.len());
193    for (k, v) in obj {
194        let (is_str, value_str) = match v {
195            Value::String(s) => ("true", s.clone()),
196            other => ("false", to_json(other)),
197        };
198        parts.push(format!(
199            "<{DSML_TOKEN}parameter name=\"{k}\" string=\"{is_str}\">{value_str}</{DSML_TOKEN}parameter>",
200        ));
201    }
202    Ok(parts.join("\n"))
203}
204
205fn render_tools(tools: &[Value]) -> String {
206    let schemas: Vec<String> = tools.iter().map(to_json).collect();
207    render_tools_template(&schemas.join("\n"))
208}
209
210/// Mirrors `find_last_user_index`: returns `None` if no user/developer
211/// message exists (Python returns -1).
212fn find_last_user_index(messages: &[Value]) -> Option<usize> {
213    for idx in (0..messages.len()).rev() {
214        let role = messages[idx].get("role").and_then(|v| v.as_str());
215        if matches!(role, Some("user") | Some("developer")) {
216            return Some(idx);
217        }
218    }
219    None
220}
221
222/// Returns `true` when `index >= last_user_idx` in the Python sense, treating
223/// the "no user message" case (-1) as: every non-negative index satisfies it.
224fn at_or_after_last_user(index: usize, last_user_idx: Option<usize>) -> bool {
225    match last_user_idx {
226        Some(idx) => index >= idx,
227        None => true,
228    }
229}
230
231/// Returns `true` when `index > last_user_idx` in the Python sense.
232fn after_last_user(index: usize, last_user_idx: Option<usize>) -> bool {
233    match last_user_idx {
234        Some(idx) => index > idx,
235        None => true,
236    }
237}
238
239/// Returns `true` when `index == last_user_idx`.
240fn equals_last_user(index: usize, last_user_idx: Option<usize>) -> bool {
241    last_user_idx == Some(index)
242}
243
244// ---------------------------------------------------------------------------
245// render_message — direct port of the Python function with the same name.
246// ---------------------------------------------------------------------------
247
248#[expect(
249    clippy::too_many_lines,
250    reason = "mirrors the Python render_message function 1:1 for sync-ability"
251)]
252fn render_message(
253    index: usize,
254    messages: &[Value],
255    thinking_mode: ThinkingMode,
256) -> Result<String, DsEncodingError> {
257    if index >= messages.len() {
258        return Err(DsEncodingError::IndexOutOfRange {
259            index,
260            len: messages.len(),
261        });
262    }
263
264    let mut prompt = String::new();
265    let msg = &messages[index];
266    let last_user_idx = find_last_user_index(messages);
267
268    let role = msg.get("role").and_then(|v| v.as_str()).unwrap_or("");
269    let content = msg.get("content").and_then(|v| v.as_str()).unwrap_or("");
270    let tools_raw = msg.get("tools").and_then(|v| v.as_array());
271    let response_format = msg.get("response_format");
272    let tool_calls_raw = msg.get("tool_calls").and_then(|v| v.as_array());
273    let reasoning_content = msg
274        .get("reasoning_content")
275        .and_then(|v| v.as_str())
276        .unwrap_or("");
277
278    let tools_owned = tools_raw.map(|t| tools_from_openai_format(t));
279    let tools = tools_owned.as_deref();
280
281    let tool_calls_owned = tool_calls_raw.map(|tc| tool_calls_from_openai_format(tc));
282    let tool_calls = tool_calls_owned.as_deref();
283
284    match role {
285        "system" => {
286            // system_msg_template is "{content}"
287            prompt.push_str(content);
288            if let Some(tools) = tools.filter(|t| !t.is_empty()) {
289                prompt.push_str("\n\n");
290                prompt.push_str(&render_tools(tools));
291            }
292            if let Some(rf) = response_format {
293                prompt.push_str("\n\n");
294                prompt.push_str(&response_format_block(&to_json(rf)));
295            }
296        }
297        "developer" => {
298            if content.is_empty() {
299                return Err(DsEncodingError::InvalidMessage {
300                    role: role.to_string(),
301                    msg: msg.to_string(),
302                });
303            }
304            let mut content_developer = String::new();
305            if let Some(tools) = tools.filter(|t| !t.is_empty()) {
306                content_developer.push_str("\n\n");
307                content_developer.push_str(&render_tools(tools));
308            }
309            if let Some(rf) = response_format {
310                content_developer.push_str("\n\n");
311                content_developer.push_str(&response_format_block(&to_json(rf)));
312            }
313            let _ = write!(content_developer, "\n\n# The user's message is: {content}");
314
315            prompt.push_str(&user_msg(&content_developer));
316
317            if equals_last_user(index, last_user_idx) && thinking_mode.is_thinking() {
318                prompt.push_str(THINKING_START_TOKEN);
319            } else {
320                prompt.push_str(THINKING_END_TOKEN);
321            }
322        }
323        "user" => {
324            prompt.push_str(&user_msg(content));
325            if equals_last_user(index, last_user_idx) && thinking_mode.is_thinking() {
326                prompt.push_str(THINKING_START_TOKEN);
327            } else {
328                prompt.push_str(THINKING_END_TOKEN);
329            }
330        }
331        "tool" => {
332            // Walk back over consecutive tool messages to find the originating
333            // assistant turn — same logic as Python.
334            let mut prev_assistant_idx: isize = index as isize - 1;
335            while prev_assistant_idx >= 0
336                && messages[prev_assistant_idx as usize]
337                    .get("role")
338                    .and_then(|v| v.as_str())
339                    == Some("tool")
340            {
341                prev_assistant_idx -= 1;
342            }
343
344            let assistant_role = if prev_assistant_idx >= 0 {
345                messages[prev_assistant_idx as usize]
346                    .get("role")
347                    .and_then(|v| v.as_str())
348            } else {
349                None
350            };
351
352            let valid_anchor =
353                index == 0 || (prev_assistant_idx >= 0 && assistant_role == Some("assistant"));
354            if !valid_anchor {
355                let anchor_idx = prev_assistant_idx.max(0) as usize;
356                return Err(DsEncodingError::InvalidToolMessages {
357                    index,
358                    context: messages[anchor_idx].to_string(),
359                });
360            }
361
362            let assistant_tool_calls = if prev_assistant_idx >= 0 {
363                messages[prev_assistant_idx as usize]
364                    .get("tool_calls")
365                    .and_then(|v| v.as_array())
366                    .map(|a| a.len())
367                    .unwrap_or(0)
368            } else {
369                0
370            };
371
372            let tool_call_order = (index as isize - prev_assistant_idx) as usize;
373            if assistant_tool_calls == 0 || assistant_tool_calls < tool_call_order {
374                return Err(DsEncodingError::NoToolCalls);
375            }
376
377            if tool_call_order == 1 {
378                prompt.push_str("\n\n<function_results>");
379            }
380
381            // tool_output_template = "\n<result>{content}</result>"
382            let _ = write!(prompt, "\n<result>{content}</result>");
383
384            if tool_call_order == assistant_tool_calls {
385                prompt.push_str("\n</function_results>");
386
387                if at_or_after_last_user(index, last_user_idx) && thinking_mode.is_thinking() {
388                    prompt.push_str("\n\n");
389                    prompt.push_str(THINKING_START_TOKEN);
390                } else {
391                    prompt.push_str("\n\n");
392                    prompt.push_str(THINKING_END_TOKEN);
393                }
394            }
395        }
396        "assistant" => {
397            let mut thinking_part = String::new();
398
399            let mut tool_calls_content = String::new();
400            if let Some(tcs) = tool_calls.filter(|t| !t.is_empty()) {
401                let mut rendered = Vec::with_capacity(tcs.len());
402                for tc in tcs {
403                    let name = tc.get("name").and_then(|v| v.as_str()).unwrap_or("");
404                    let args = encode_arguments_to_dsml(tc)?;
405                    rendered.push(format!(
406                        "<{DSML_TOKEN}invoke name=\"{name}\">\n{args}\n</{DSML_TOKEN}invoke>",
407                    ));
408                }
409                let joined = rendered.join("\n");
410                let _ = write!(
411                    tool_calls_content,
412                    "\n\n<{DSML_TOKEN}function_calls>\n{joined}\n</{DSML_TOKEN}function_calls>"
413                );
414            }
415
416            let summary_content = content;
417
418            if thinking_mode.is_thinking() && after_last_user(index, last_user_idx) {
419                let has_reasoning = !reasoning_content.is_empty();
420                let has_tool_calls = tool_calls.is_some_and(|t| !t.is_empty());
421                if !has_reasoning && !has_tool_calls {
422                    return Err(DsEncodingError::MissingReasoningOrToolCalls(
423                        msg.to_string(),
424                    ));
425                }
426                thinking_part.push_str(reasoning_content);
427                thinking_part.push_str(THINKING_END_TOKEN);
428            }
429
430            // assistant_msg_template = "{reasoning}{content}{tool_calls}<｜end▁of▁sentence｜>"
431            prompt.push_str(&thinking_part);
432            prompt.push_str(summary_content);
433            prompt.push_str(&tool_calls_content);
434            prompt.push_str(EOS_TOKEN);
435        }
436        other => return Err(DsEncodingError::UnknownRole(other.to_string())),
437    }
438
439    Ok(prompt)
440}
441
442// ---------------------------------------------------------------------------
443// drop_thinking_messages
444// ---------------------------------------------------------------------------
445
446fn drop_thinking_messages(messages: &[Value]) -> Vec<Value> {
447    let last_user_idx = find_last_user_index(messages);
448    let mut out: Vec<Value> = Vec::with_capacity(messages.len());
449
450    for (idx, msg) in messages.iter().enumerate() {
451        let role = msg.get("role").and_then(|v| v.as_str()).unwrap_or("");
452        let always_keep =
453            matches!(role, "user" | "system" | "tool") || at_or_after_last_user(idx, last_user_idx);
454
455        if always_keep {
456            out.push(msg.clone());
457            continue;
458        }
459
460        if role == "assistant" {
461            let mut cloned = msg.clone();
462            if let Some(obj) = cloned.as_object_mut() {
463                obj.remove("reasoning_content");
464            }
465            out.push(cloned);
466        }
467        // Other roles before last_user_idx are dropped, matching Python.
468    }
469
470    out
471}
472
473// ---------------------------------------------------------------------------
474// encode_messages — public entry point
475// ---------------------------------------------------------------------------
476
477/// Encode a list of OpenAI-style messages into a DeepSeek V3.2 prompt string.
478///
479/// The signature mirrors the Python `encode_messages` function;
480/// `context` is omitted because SMG always renders from scratch.
481#[expect(
482    clippy::trivially_copy_pass_by_ref,
483    reason = "public API mirrors the documented Rust signature with a borrow"
484)]
485pub fn encode_messages(
486    messages: &[Value],
487    thinking_mode: ThinkingMode,
488    params: &EncodeParams,
489) -> Result<String, DsEncodingError> {
490    let mut full_messages: Vec<Value> = messages.to_vec();
491
492    let mut prompt = if params.add_default_bos_token {
493        BOS_TOKEN.to_string()
494    } else {
495        String::new()
496    };
497
498    if thinking_mode.is_thinking() && params.drop_thinking {
499        full_messages = drop_thinking_messages(&full_messages);
500    }
501
502    for idx in 0..full_messages.len() {
503        prompt.push_str(&render_message(idx, &full_messages, thinking_mode)?);
504    }
505
506    Ok(prompt)
507}
508
509// ---------------------------------------------------------------------------
510// Tests
511// ---------------------------------------------------------------------------
512
513#[cfg(test)]
514mod tests {
515    use serde_json::json;
516
517    use super::*;
518
519    fn user(text: &str) -> Value {
520        json!({ "role": "user", "content": text })
521    }
522
523    fn assistant_with_reasoning(reasoning: &str, content: &str) -> Value {
524        json!({
525            "role": "assistant",
526            "reasoning_content": reasoning,
527            "content": content,
528        })
529    }
530
531    #[test]
532    fn one_turn_user_chat_mode_closes_think() {
533        let msgs = [user("Hello")];
534        let out = encode_messages(&msgs, ThinkingMode::Chat, &EncodeParams::default()).unwrap();
535
536        let expected =
537            format!("{BOS_TOKEN}{USER_PREFIX}Hello{ASSISTANT_SUFFIX}{THINKING_END_TOKEN}",);
538        assert_eq!(out, expected);
539    }
540
541    #[test]
542    fn one_turn_user_thinking_mode_opens_think() {
543        let msgs = [user("Hello")];
544        let out = encode_messages(&msgs, ThinkingMode::Thinking, &EncodeParams::default()).unwrap();
545
546        let expected =
547            format!("{BOS_TOKEN}{USER_PREFIX}Hello{ASSISTANT_SUFFIX}{THINKING_START_TOKEN}",);
548        assert_eq!(out, expected);
549    }
550
551    #[test]
552    fn drop_thinking_strips_earlier_reasoning() {
553        // Three-turn conversation: the assistant turn at index 1 carries
554        // reasoning_content. The Python encoder only emits reasoning on
555        // assistant messages strictly *after* the last user turn, so an
556        // earlier assistant turn never leaks reasoning regardless of the
557        // drop_thinking flag. The flag controls whether the field is
558        // *retained on the message dict* — verify the rendered prompt is
559        // unchanged in either direction.
560        let msgs = [
561            user("Q1"),
562            assistant_with_reasoning("private thought", "A1"),
563            user("Q2"),
564        ];
565
566        let out_drop =
567            encode_messages(&msgs, ThinkingMode::Thinking, &EncodeParams::default()).unwrap();
568        assert!(!out_drop.contains("private thought"));
569
570        let params = EncodeParams {
571            drop_thinking: false,
572            ..EncodeParams::default()
573        };
574        let out_keep = encode_messages(&msgs, ThinkingMode::Thinking, &params).unwrap();
575        assert!(!out_keep.contains("private thought"));
576
577        // Sanity: the most-recent assistant turn (after the last user) DOES
578        // emit reasoning_content, proving the dropper acted only on the
579        // earlier turn.
580        let msgs2 = [user("Q1"), assistant_with_reasoning("recent thought", "A1")];
581        let out_recent =
582            encode_messages(&msgs2, ThinkingMode::Thinking, &EncodeParams::default()).unwrap();
583        assert!(out_recent.contains("recent thought"));
584    }
585
586    #[test]
587    fn assistant_tool_call_renders_dsml() {
588        let msgs = [
589            user("call my tool"),
590            json!({
591                "role": "assistant",
592                "reasoning_content": "thinking about tool",
593                "content": "",
594                "tool_calls": [
595                    {
596                        "type": "function",
597                        "function": {
598                            "name": "search",
599                            "arguments": "{\"query\": \"deepseek\", \"limit\": 5}"
600                        }
601                    }
602                ]
603            }),
604        ];
605
606        let out = encode_messages(&msgs, ThinkingMode::Thinking, &EncodeParams::default()).unwrap();
607
608        assert!(out.contains(&format!("<{DSML_TOKEN}function_calls>")));
609        assert!(out.contains(&format!("<{DSML_TOKEN}invoke name=\"search\">")));
610        assert!(out.contains(&format!(
611            "<{DSML_TOKEN}parameter name=\"query\" string=\"true\">deepseek</{DSML_TOKEN}parameter>"
612        )));
613        assert!(out.contains(&format!(
614            "<{DSML_TOKEN}parameter name=\"limit\" string=\"false\">5</{DSML_TOKEN}parameter>"
615        )));
616        assert!(out.contains(&format!("</{DSML_TOKEN}function_calls>")));
617        assert!(out.ends_with(EOS_TOKEN));
618    }
619
620    #[test]
621    fn unknown_role_errors() {
622        let msgs = [json!({ "role": "moderator", "content": "hi" })];
623        let err = encode_messages(&msgs, ThinkingMode::Chat, &EncodeParams::default()).unwrap_err();
624        assert!(matches!(err, DsEncodingError::UnknownRole(ref r) if r == "moderator"));
625    }
626
627    #[test]
628    fn skip_bos_when_disabled() {
629        let msgs = [user("Hi")];
630        let params = EncodeParams {
631            add_default_bos_token: false,
632            ..EncodeParams::default()
633        };
634        let out = encode_messages(&msgs, ThinkingMode::Chat, &params).unwrap();
635        assert!(!out.starts_with(BOS_TOKEN));
636        assert!(out.starts_with(USER_PREFIX));
637    }
638
639    #[test]
640    fn drop_thinking_does_not_overrun_when_filtering_shrinks_messages() {
641        // `drop_thinking_messages` removes developer-role messages before the
642        // last user turn, so `full_messages.len() < messages.len()`. The
643        // outer loop must iterate the filtered length, not the original, or
644        // it walks off the end and returns IndexOutOfRange.
645        let msgs = [
646            json!({ "role": "developer", "content": "earlier developer note" }),
647            json!({ "role": "user", "content": "now" }),
648        ];
649        let out = encode_messages(&msgs, ThinkingMode::Thinking, &EncodeParams::default())
650            .expect("filtered message length must not blow up the loop");
651        assert!(
652            out.contains("now"),
653            "user message missing from prompt: {out}"
654        );
655    }
656}
llm_tokenizer/encoders/deepseek_v32.rs

llm_tokenizer/encoders/
deepseek_v32.rs