dynamo_parsers/tool_calling/json/
deepseek_parser.rs1use regex::RegexBuilder;
5use serde_json::Value;
6use uuid::Uuid;
7
8use super::config::JsonParserConfig;
9use super::response::{CalledFunction, ToolCallResponse, ToolCallType};
10
11fn extract_tool_call_blocks(
20 input: &str,
21 start_tokens: &[String],
22 end_tokens: &[String],
23) -> Vec<String> {
24 let mut blocks = Vec::new();
25
26 let individual_start_tokens: Vec<&String> = start_tokens
28 .iter()
29 .filter(|t| t.contains("tool_call_begin") || t.contains("tool▁call▁begin"))
30 .collect();
31
32 let individual_end_tokens: Vec<&String> = end_tokens
33 .iter()
34 .filter(|t| t.contains("tool_call_end") || t.contains("tool▁call▁end"))
35 .collect();
36
37 for start_token in individual_start_tokens.iter() {
39 for end_token in individual_end_tokens.iter() {
40 if start_token.is_empty() || end_token.is_empty() {
41 continue;
42 }
43
44 let escaped_start = regex::escape(start_token);
46 let escaped_end = regex::escape(end_token);
47 let pattern = format!(r"{}(.*?){}", escaped_start, escaped_end);
48
49 if let Ok(regex) = RegexBuilder::new(&pattern)
50 .dot_matches_new_line(true)
51 .build()
52 {
53 for capture in regex.captures_iter(input) {
54 if let Some(matched) = capture.get(1) {
55 let content = matched.as_str();
57 if !content.trim().is_empty() {
58 blocks.push(content.to_string());
59 }
60 }
61 }
62
63 if !blocks.is_empty() {
65 return blocks;
66 }
67 }
68 }
69 }
70
71 blocks
72}
73
74fn parse_single_tool_call(block: &str, separator_tokens: &[String]) -> Option<(String, Value)> {
78 for sep_token in separator_tokens.iter() {
80 if sep_token.is_empty() {
81 continue;
82 }
83
84 if let Some((name_part, args_part)) = block.split_once(sep_token) {
85 let function_name = name_part.trim();
86 let args_str = args_part.trim();
87
88 if function_name.is_empty() || function_name.contains(['{', '}', '[', ']']) {
90 continue;
91 }
92
93 if let Ok(arguments) = serde_json::from_str::<Value>(args_str) {
96 return Some((function_name.to_string(), arguments));
97 }
98
99 let normalized = args_str
102 .lines()
103 .map(|line| line.trim_start())
104 .collect::<Vec<_>>()
105 .join(" ");
106
107 if let Ok(arguments) = serde_json::from_str::<Value>(&normalized) {
108 return Some((function_name.to_string(), arguments));
109 }
110 }
111 }
112
113 None
114}
115
116pub fn parse_tool_calls_deepseek_v3_1(
117 message: &str,
118 config: &JsonParserConfig,
119) -> anyhow::Result<(Vec<ToolCallResponse>, Option<String>)> {
120 let trimmed = message.trim();
123
124 if trimmed.is_empty() {
126 return Ok((vec![], Some(String::new())));
127 }
128
129 let has_end_token = config
140 .tool_call_end_tokens
141 .iter()
142 .any(|token| !token.is_empty() && trimmed.contains(token));
143 if !has_end_token {
144 return Ok((vec![], Some(trimmed.to_string())));
145 }
146
147 let mut tool_call_start_tokens = config.tool_call_start_tokens.clone();
148 tool_call_start_tokens.extend(vec!["<|tool▁call▁begin|>".to_string()]);
149 let mut tool_call_end_tokens = config.tool_call_end_tokens.clone();
150 tool_call_end_tokens.extend(vec!["<|tool▁call▁end|>".to_string()]);
151 let separator_tokens = &config.tool_call_separator_tokens;
152
153 if tool_call_start_tokens.is_empty() || separator_tokens.is_empty() {
155 return Ok((vec![], Some(trimmed.to_string())));
156 }
157
158 if !detect_tool_call_start_deepseek_v3_1(trimmed, config) {
160 return Ok((vec![], Some(trimmed.to_string())));
161 }
162
163 let wrapper_tokens: Vec<&String> = tool_call_start_tokens
166 .iter()
167 .filter(|t| t.contains("tool_calls_begin") || t.contains("tool▁calls▁begin"))
168 .collect();
169
170 let normal_text = if !wrapper_tokens.is_empty() {
171 wrapper_tokens
172 .iter()
173 .find_map(|token| {
174 trimmed
175 .find(token.as_str())
176 .map(|idx| trimmed[..idx].to_string())
177 })
178 .unwrap_or_else(String::new)
179 } else {
180 tool_call_start_tokens
182 .iter()
183 .filter(|token| !token.is_empty())
184 .find_map(|token| trimmed.find(token).map(|idx| trimmed[..idx].to_string()))
185 .unwrap_or_else(String::new)
186 };
187
188 let blocks = extract_tool_call_blocks(trimmed, &tool_call_start_tokens, &tool_call_end_tokens);
190
191 if blocks.is_empty() {
192 return Ok((vec![], Some(trimmed.to_string())));
194 }
195
196 let mut tool_calls: Vec<ToolCallResponse> = Vec::new();
198 for block in blocks {
199 if let Some((function_name, arguments)) = parse_single_tool_call(&block, separator_tokens) {
200 tool_calls.push(ToolCallResponse {
201 id: format!("call-{}", Uuid::new_v4()),
202 tp: ToolCallType::Function,
203 function: CalledFunction {
204 name: function_name,
205 arguments: serde_json::to_string(&arguments)?,
206 },
207 });
208 }
209 }
210
211 if tool_calls.is_empty() {
213 return Ok((vec![], Some(trimmed.to_string())));
214 }
215
216 Ok((tool_calls, Some(normal_text)))
217}
218
219pub fn detect_tool_call_start_deepseek_v3_1(chunk: &str, config: &JsonParserConfig) -> bool {
220 let trimmed = chunk.trim();
221 if trimmed.is_empty() {
222 return false;
223 }
224
225 let has_complete_token = config
227 .tool_call_start_tokens
228 .iter()
229 .any(|token| !token.is_empty() && trimmed.contains(token));
230
231 if has_complete_token {
232 return true;
233 }
234
235 config.tool_call_start_tokens.iter().any(|token| {
238 if token.is_empty() {
239 return false;
240 }
241 for i in 1..=token.chars().count() {
244 if let Some(prefix) = token.chars().take(i).collect::<String>().get(..) {
245 let prefix_str = &prefix[..prefix.len()];
246 if trimmed == prefix_str || trimmed.ends_with(prefix_str) {
247 return true;
248 }
249 }
250 }
251 false
252 })
253}
254
255#[cfg(test)]
256mod tests {
257 use super::super::config::ToolCallConfig;
258 use super::*;
259
260 fn extract_name_and_args(call: ToolCallResponse) -> (String, serde_json::Value) {
261 let args: serde_json::Value = serde_json::from_str(&call.function.arguments).unwrap();
262 (call.function.name, args)
263 }
264
265 #[test]
266 fn test_parse_tool_calls_deepseek_v3_1_basic() {
267 let text = r#"<|tool▁calls▁begin|><|tool▁call▁begin|>get_current_weather<|tool▁sep|>{"location": "Tokyo"}<|tool▁call▁end|><|tool▁call▁begin|>get_current_weather<|tool▁sep|>{"location": "Paris"}<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|>"#;
268 let config = ToolCallConfig::deepseek_v3_1().json;
269 let (result, content) = parse_tool_calls_deepseek_v3_1(text, &config).unwrap();
270 assert_eq!(content, Some("".to_string()));
271 assert_eq!(result.len(), 2);
272 let (name, args) = extract_name_and_args(result[0].clone());
273 assert_eq!(name, "get_current_weather");
274 assert_eq!(args["location"], "Tokyo");
275 let (name, args) = extract_name_and_args(result[1].clone());
276 assert_eq!(name, "get_current_weather");
277 assert_eq!(args["location"], "Paris");
278 }
279
280 #[test]
281 fn test_parse_tool_calls_deepseek_v3_1_with_normal_text() {
282 let text = r#"The following tool call retrieves weather information: <|tool▁calls▁begin|><|tool▁call▁begin|>get_current_weather<|tool▁sep|>{"location": "New York"}<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|>"#;
283 let config = ToolCallConfig::deepseek_v3_1().json;
284 let (result, content) = parse_tool_calls_deepseek_v3_1(text, &config).unwrap();
285 assert_eq!(
286 content,
287 Some("The following tool call retrieves weather information: ".to_string())
288 );
289 assert_eq!(result.len(), 1);
290 let (name, args) = extract_name_and_args(result[0].clone());
291 assert_eq!(name, "get_current_weather");
292 assert_eq!(args["location"], "New York");
293 }
294
295 #[test]
296 fn test_parse_tool_calls_deepseek_v3_1_without_tool_call_start_token() {
297 let text = r#"<|tool▁call▁begin|>get_current_weather宽带}{location": "Tokyo"}<|tool▁call▁end|><|tool▁calls▁end|>"#;
298 let config = ToolCallConfig::deepseek_v3_1().json;
299 let (result, content) = parse_tool_calls_deepseek_v3_1(text, &config).unwrap();
300 assert_eq!(content, Some(text.to_string()));
301 assert_eq!(result.len(), 0);
302 }
303
304 #[test]
305 fn test_parse_tool_calls_deepseek_v3_1_with_multi_tool_calls_with_multiple_args() {
306 let text = r#"<|tool▁calls▁begin|><|tool▁call▁begin|>get_current_weather<|tool▁sep|>{"location": "Berlin", "units": "metric"}<|tool▁call▁end|><|tool▁call▁begin|>get_weather_forecast<|tool▁sep|>{"location": "Berlin", "days": 7, "units": "imperial"}<|tool▁call▁end|><|tool▁call▁begin|>get_air_quality<|tool▁sep|>{"location": "Berlin", "radius": 50}<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|>"#;
307 let config = ToolCallConfig::deepseek_v3_1().json;
308 let (result, content) = parse_tool_calls_deepseek_v3_1(text, &config).unwrap();
309 assert_eq!(content, Some("".to_string()));
310 assert_eq!(result.len(), 3);
311 let (name, args) = extract_name_and_args(result[0].clone());
312 assert_eq!(name, "get_current_weather");
313 assert_eq!(args["location"], "Berlin");
314 assert_eq!(args["units"], "metric");
315 let (name, args) = extract_name_and_args(result[1].clone());
316 assert_eq!(name, "get_weather_forecast");
317 assert_eq!(args["location"], "Berlin");
318 assert_eq!(args["days"], 7);
319 assert_eq!(args["units"], "imperial");
320 let (name, args) = extract_name_and_args(result[2].clone());
321 assert_eq!(name, "get_air_quality");
322 assert_eq!(args["location"], "Berlin");
323 assert_eq!(args["radius"], 50);
324 }
325
326 #[test]
327 fn test_parse_tool_calls_deepseek_v3_1_with_invalid_json() {
328 let text = r#"<|tool▁calls▁begin|><|tool▁call▁begin|>get_current_weather}{location": "Tokyo"}<|tool▁call▁end|><|tool▁calls▁end|>"#;
330 let config = ToolCallConfig::deepseek_v3_1().json;
331 let (result, content) = parse_tool_calls_deepseek_v3_1(text, &config).unwrap();
332 assert_eq!(content, Some(text.trim().to_string()));
333 assert_eq!(result.len(), 0);
334 }
335
336 #[test]
337 fn test_parse_tool_calls_deepseek_v3_1_with_multi_tool_calls_with_normal_text() {
338 let text = r#"The following tool calls retrieve weather information: <|tool▁calls▁begin|><|tool▁call▁begin|>get_current_weather宽带}{location": "Tokyo"}<|tool▁call▁end|><|tool▁call▁begin|>get_weather_forecast宽带}{location": "Berlin", "days": 7, "units": "imperial"}<|tool▁call▁end|><|tool▁call▁begin|>get_air_quality宽带}{location": "Berlin", "radius": 50}<|tool▁call▁end|><|tool▁calls▁end|>"#;
340 let config = ToolCallConfig::deepseek_v3_1().json;
341 let (result, content) = parse_tool_calls_deepseek_v3_1(text, &config).unwrap();
342 assert_eq!(content, Some(text.trim().to_string()));
343 assert_eq!(result.len(), 0);
344 }
345
346 #[test]
347 fn test_parse_tool_calls_deepseek_v3_1_with_multiline_json() {
348 let text = r#"I'll help you understand this codebase. Let me start by exploring the structure and key
349 files to provide you with a comprehensive
350 explanation.<|tool▁calls▁begin|><|tool▁call▁begin|>TodoWrite<|tool▁sep|>{"todos":
351 [{"content": "Explore the root directory structure", "status": "in_progress", "activeForm":
352 "Exploring the root directory structure"}, {"content": "Examine package.json and
353 configuration files", "status": "pending", "activeForm": "Examining package.json and
354 configuration files"}, {"content": "Analyze source code structure and key modules",
355 "status": "pending", "activeForm": "Analyzing source code structure and key modules"},
356 {"content": "Identify main entry points and architectural patterns", "status": "pending",
357 "activeForm": "Identifying main entry points and architectural patterns"}, {"content":
358 "Summarize the codebase purpose and functionality", "status": "pending", "activeForm":
359 "Summarizing the codebase purpose and
360 functionality"}]}<|tool▁call▁end|><|tool▁calls▁end|>"#;
361 let config = ToolCallConfig::deepseek_v3_1().json;
362
363 let (tool_call_results, normal_content) =
364 parse_tool_calls_deepseek_v3_1(text, &config).unwrap();
365
366 assert_eq!(tool_call_results.len(), 1);
367
368 let (name, args) = extract_name_and_args(tool_call_results[0].clone());
369 assert_eq!(name, "TodoWrite");
370 assert_eq!(tool_call_results[0].tp, ToolCallType::Function);
371
372 let todos_array = args["todos"].as_array().unwrap();
373 assert_eq!(todos_array.len(), 5);
374
375 assert_eq!(
376 todos_array[0]["content"],
377 "Explore the root directory structure"
378 );
379 assert_eq!(todos_array[0]["status"], "in_progress");
380 assert_eq!(
381 todos_array[0]["activeForm"],
382 "Exploring the root directory structure"
383 );
384
385 assert_eq!(
386 todos_array[1]["content"],
387 "Examine package.json and configuration files"
388 );
389 assert_eq!(todos_array[1]["status"], "pending");
390
391 assert_eq!(
392 todos_array[4]["content"],
393 "Summarize the codebase purpose and functionality"
394 );
395 assert_eq!(todos_array[4]["status"], "pending");
396
397 assert_eq!(
398 normal_content,
399 Some("I'll help you understand this codebase. Let me start by exploring the structure and key\n files to provide you with a comprehensive\n explanation.".to_string())
400 );
401 }
402}
403
404#[cfg(test)]
405mod detect_parser_tests {
406 use super::super::config::ToolCallConfig;
407 use super::*;
408 #[test]
409 fn test_detect_tool_call_start_deepseek_v3_1_chunk_with_tool_call_start_token() {
410 let text = r#"<|tool▁calls▁begin|><|tool▁call▁begin|>get_current_weather宽带}"#;
411 let config = ToolCallConfig::deepseek_v3_1().json;
412 let result = detect_tool_call_start_deepseek_v3_1(text, &config);
413 assert!(result);
414 }
415
416 #[test]
417 fn test_detect_tool_call_start_deepseek_v3_1_chunk_without_tool_call_start_token() {
418 let text = r#"<|tool▁call▁begin|>get_current_weather宽带}"#;
419 let config = ToolCallConfig::deepseek_v3_1().json;
420 let result = detect_tool_call_start_deepseek_v3_1(text, &config);
421 assert!(!result);
422 }
423
424 #[test]
425 fn test_detect_tool_call_start_deepseek_v3_1_chunk_with_tool_call_start_token_in_middle() {
426 let text = r#"The following tool calls retrieve weather information: <|tool▁calls▁begin|><|tool▁call▁begin|>get_current_weather宽带}"#;
427 let config = ToolCallConfig::deepseek_v3_1().json;
428 let result = detect_tool_call_start_deepseek_v3_1(text, &config);
429 assert!(result);
430 }
431
432 #[test]
433 fn test_detect_tool_call_start_deepseek_v3_1_partial_tokens() {
434 let config = ToolCallConfig::deepseek_v3_1().json;
436
437 assert!(
439 detect_tool_call_start_deepseek_v3_1("<", &config),
440 "'<' should be detected as potential start"
441 );
442 assert!(
443 detect_tool_call_start_deepseek_v3_1("<|", &config),
444 "'<|' should be detected as potential start"
445 );
446 assert!(
447 detect_tool_call_start_deepseek_v3_1("<|tool", &config),
448 "'<|tool' should be detected as potential start"
449 );
450 assert!(
451 detect_tool_call_start_deepseek_v3_1("<|tool▁calls", &config),
452 "'<|tool▁calls' should be detected as potential start"
453 );
454
455 assert!(
457 !detect_tool_call_start_deepseek_v3_1("hello world", &config),
458 "'hello world' should not be detected"
459 );
460 assert!(
461 !detect_tool_call_start_deepseek_v3_1("xyz", &config),
462 "'xyz' should not be detected"
463 );
464 }
465}