dynamo_parsers/reasoning/
base_parser.rs

1// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4use crate::{ParserResult, ReasoningParser};
5
6#[derive(Default, Debug, Clone)]
7pub struct BasicReasoningParser {
8    think_start_token: String,
9    think_end_token: String,
10    _in_reasoning: bool,
11    stream_reasoning: bool,
12    _buffer: String,
13    stripped_think_start: bool,
14}
15
16impl BasicReasoningParser {
17    pub fn new(
18        think_start_token: String,
19        think_end_token: String,
20        force_reasoning: bool,
21        stream_reasoning: bool,
22    ) -> Self {
23        Self {
24            think_start_token,
25            think_end_token,
26            _in_reasoning: force_reasoning,
27            stream_reasoning,
28            _buffer: String::new(),
29            stripped_think_start: false,
30        }
31    }
32}
33
34impl ReasoningParser for BasicReasoningParser {
35    fn detect_and_parse_reasoning(&mut self, text: &str, _token_ids: &[u32]) -> ParserResult {
36        let in_reasoning = self._in_reasoning || text.contains(&self.think_start_token);
37        if !in_reasoning {
38            return ParserResult {
39                normal_text: text.to_string(),
40                reasoning_text: String::new(),
41            };
42        }
43
44        // The text is considered to be in a reasoning block.
45        let processed_text = text.replace(&self.think_start_token, "").trim().to_string();
46
47        if !processed_text.contains(&self.think_end_token) {
48            // Assume reasoning was truncated before `think_end_token`
49            return ParserResult {
50                normal_text: String::new(),
51                reasoning_text: processed_text,
52            };
53        }
54
55        // Extract reasoning content
56        let splits: Vec<&str> = processed_text.splitn(2, &self.think_end_token).collect();
57        let reasoning_text = splits.first().unwrap_or(&"").to_string();
58        let normal_text = splits
59            .get(1)
60            .map(|s| s.trim().to_string())
61            .unwrap_or_default();
62
63        ParserResult {
64            normal_text,
65            reasoning_text,
66        }
67    }
68
69    fn parse_reasoning_streaming_incremental(
70        &mut self,
71        text: &str,
72        _token_ids: &[u32],
73    ) -> ParserResult {
74        // Incrementally parse the streaming text
75        self._buffer.push_str(text);
76        let mut current_text = self._buffer.to_string();
77        // If the current text is a prefix of the think token, keep buffering
78
79        if self.think_start_token.starts_with(&current_text)
80            && self.think_start_token.as_str() != current_text.as_str()
81        {
82            return ParserResult {
83                normal_text: String::new(),
84                reasoning_text: String::new(),
85            };
86        }
87        if self.think_end_token.starts_with(&current_text)
88            && self.think_end_token.as_str() != current_text.as_str()
89        {
90            return ParserResult {
91                normal_text: String::new(),
92                reasoning_text: String::new(),
93            };
94        }
95
96        // Strip `<think>` token if present
97        if !self.stripped_think_start && current_text.contains(&self.think_start_token) {
98            current_text = current_text.replace(&self.think_start_token, "");
99            self._buffer = current_text.to_string();
100            self.stripped_think_start = true;
101            self._in_reasoning = true;
102        }
103        // Handle end of reasoning block
104        let mut think_end_idx = current_text.len();
105        if self._in_reasoning {
106            think_end_idx = current_text
107                .find(&self.think_end_token)
108                .unwrap_or(current_text.len());
109        }
110        if self._in_reasoning && think_end_idx < current_text.len() {
111            let reasoning_text = &current_text[..think_end_idx];
112            self._buffer.clear();
113            self._in_reasoning = false;
114            let start_idx = think_end_idx + self.think_end_token.len();
115            let normal_text = if start_idx < current_text.len() {
116                &current_text[start_idx..]
117            } else {
118                ""
119            };
120            return ParserResult {
121                normal_text: normal_text.to_string(),
122                reasoning_text: reasoning_text.to_string(),
123            };
124        }
125        // Continue with reasoning content
126        if self._in_reasoning && self.stream_reasoning {
127            // Stream the content immediately
128            let reasoning_text = current_text;
129            self._buffer.clear();
130            ParserResult {
131                normal_text: String::new(),
132                reasoning_text,
133            }
134        } else if !self._in_reasoning {
135            // If we're not in a reasoning block return as normal text
136            let normal_text = current_text;
137            self._buffer.clear();
138            ParserResult {
139                normal_text,
140                reasoning_text: String::new(),
141            }
142        } else {
143            // If we are in a reasoning block but no end token is found, return the current buffer
144            ParserResult {
145                normal_text: String::new(),
146                reasoning_text: String::new(),
147            }
148        }
149    }
150}
151
152#[cfg(test)]
153mod tests {
154    use super::*;
155
156    #[test]
157    fn test_detect_and_parse_reasoning_reasoning() {
158        let mut parser =
159            BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, true);
160        let result =
161            parser.detect_and_parse_reasoning("<think>with reasoning</think> and more text.", &[]);
162        assert_eq!(result.normal_text, "and more text.");
163        assert_eq!(result.reasoning_text, "with reasoning");
164    }
165    #[test]
166    fn test_detect_and_parse_reasoning_reasoning_no_reasoning() {
167        let mut parser =
168            BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, true);
169        let result = parser.detect_and_parse_reasoning("This is a test without reasoning.", &[]);
170        assert_eq!(result.normal_text, "This is a test without reasoning.");
171        assert_eq!(result.reasoning_text, "");
172    }
173    #[test]
174    fn test_detect_and_parse_reasoning_reasoning_truncated_reasoning() {
175        let mut parser =
176            BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, true);
177        let result = parser.detect_and_parse_reasoning("<think>with truncated reasoning", &[]);
178        assert_eq!(result.normal_text, "");
179        assert_eq!(result.reasoning_text, "with truncated reasoning");
180    }
181
182    #[test]
183    fn test_parse_reasoning_streaming_incremental() {
184        let mut parser =
185            BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, true);
186        let result = parser.parse_reasoning_streaming_incremental("<thi", &[]);
187        assert_eq!(result.normal_text, "");
188        assert_eq!(result.reasoning_text, "");
189    }
190
191    #[test]
192    fn test_parse_reasoning_streaming_incremental_complete() {
193        let mut parser =
194            BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, true);
195        let result = parser.parse_reasoning_streaming_incremental(
196            "<think>with reasoning</think> and more text.",
197            &[],
198        );
199        assert_eq!(result.normal_text, " and more text.");
200        assert_eq!(result.reasoning_text, "with reasoning");
201    }
202
203    #[test]
204    fn test_parse_reasoning_streaming_incremental_no_end_token() {
205        let mut parser =
206            BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), true, true);
207        let result = parser.parse_reasoning_streaming_incremental("<think>with reasoning", &[]);
208        assert_eq!(result.normal_text, "");
209        assert_eq!(result.reasoning_text, "with reasoning");
210    }
211
212    #[test]
213    fn test_detect_and_parse_reasoning_multiple_reasoning_blocks() {
214        let mut parser =
215            BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, true);
216        let result = parser.detect_and_parse_reasoning(
217            "<think>first reasoning</think> middle <think>second reasoning</think> end",
218            &[],
219        );
220        // The current implementation only handles the first occurrence properly
221        assert_eq!(result.normal_text, "middle second reasoning</think> end");
222        assert_eq!(result.reasoning_text, "first reasoning");
223    }
224
225    #[test]
226    fn test_streaming_multiple_reasoning_blocks() {
227        let mut parser =
228            BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, false);
229        let result1 = parser
230            .parse_reasoning_streaming_incremental("<think>first reasoning</think> middle", &[]);
231        assert_eq!(result1.normal_text, " middle");
232        assert_eq!(result1.reasoning_text, "first reasoning");
233
234        // Basic parser assumes only one reasoning block at a time
235        let result2 = parser
236            .parse_reasoning_streaming_incremental(" <think>second reasoning</think> end", &[]);
237        assert_eq!(result2.normal_text, " <think>second reasoning</think> end");
238        assert_eq!(result2.reasoning_text, "");
239    }
240
241    #[test]
242    fn test_partial_token_matching_opening_tag() {
243        let mut parser =
244            BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, true);
245
246        // Feed partial opening tag
247        let result1 = parser.parse_reasoning_streaming_incremental("<th", &[]);
248        assert_eq!(result1.normal_text, "");
249        assert_eq!(result1.reasoning_text, "");
250
251        // Complete the opening tag and add content
252        let result2 = parser.parse_reasoning_streaming_incremental(
253            "ink>reasoning content</think> normal text",
254            &[],
255        );
256        assert_eq!(result2.normal_text, " normal text");
257        assert_eq!(result2.reasoning_text, "reasoning content");
258    }
259
260    #[test]
261    fn test_partial_token_matching_closing_tag() {
262        let mut parser =
263            BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, false);
264
265        // Start with complete opening and partial content
266        let result1 =
267            parser.parse_reasoning_streaming_incremental("<think>reasoning content</th", &[]);
268        assert_eq!(result1.normal_text, "");
269        assert_eq!(result1.reasoning_text, "");
270
271        // Complete the closing tag
272        let result2 = parser.parse_reasoning_streaming_incremental("ink> normal text", &[]);
273        assert_eq!(result2.normal_text, " normal text");
274        assert_eq!(result2.reasoning_text, "reasoning content");
275    }
276
277    #[test]
278    fn test_buffer_state_persistence_across_calls() {
279        let mut parser =
280            BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, false);
281
282        // First call - partial opening tag
283        let result1 = parser.parse_reasoning_streaming_incremental("<th", &[]);
284        assert_eq!(result1.normal_text, "");
285        assert_eq!(result1.reasoning_text, "");
286
287        // Second call - complete opening tag, start reasoning
288        let result2 = parser.parse_reasoning_streaming_incremental("ink>part1 ", &[]);
289        assert_eq!(result2.normal_text, "");
290        assert_eq!(result2.reasoning_text, "");
291
292        // Third call - more reasoning content
293        let result3 = parser.parse_reasoning_streaming_incremental("part2 ", &[]);
294        assert_eq!(result3.normal_text, "");
295        assert_eq!(result3.reasoning_text, "");
296
297        // Fourth call - end reasoning and normal text
298        let result4 = parser.parse_reasoning_streaming_incremental("part3</think> normal", &[]);
299        assert_eq!(result4.normal_text, " normal");
300        assert_eq!(result4.reasoning_text, "part1 part2 part3");
301    }
302
303    #[test]
304    fn test_streaming_with_stream_reasoning_enabled() {
305        let mut parser =
306            BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, true);
307
308        // Start reasoning block
309        let result1 = parser.parse_reasoning_streaming_incremental("<think>reasoning ", &[]);
310        assert_eq!(result1.normal_text, "");
311        assert_eq!(result1.reasoning_text, "reasoning ");
312
313        // Continue streaming reasoning
314        let result2 = parser.parse_reasoning_streaming_incremental("content ", &[]);
315        assert_eq!(result2.normal_text, "");
316        assert_eq!(result2.reasoning_text, "content ");
317
318        // End reasoning block
319        let result3 = parser.parse_reasoning_streaming_incremental("more</think> normal", &[]);
320        assert_eq!(result3.normal_text, " normal");
321        assert_eq!(result3.reasoning_text, "more");
322    }
323
324    #[test]
325    fn test_nested_reasoning_blocks() {
326        let mut parser =
327            BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, true);
328        let result = parser.detect_and_parse_reasoning(
329            "<think>outer <think>inner</think> reasoning</think> normal",
330            &[],
331        );
332        // Current implementation should handle this by finding the first closing tag
333        assert_eq!(result.normal_text, "reasoning</think> normal");
334        // All <think> tags are stripped, so <think>inner is not included
335        assert_eq!(result.reasoning_text, "outer inner");
336    }
337
338    #[test]
339    fn test_malformed_missing_closing_tag() {
340        let mut parser =
341            BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, true);
342        let result = parser.detect_and_parse_reasoning("<think>reasoning without closing tag", &[]);
343        assert_eq!(result.normal_text, "");
344        assert_eq!(result.reasoning_text, "reasoning without closing tag");
345    }
346
347    #[test]
348    fn test_malformed_stray_closing_tag() {
349        let mut parser =
350            BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, true);
351        let result = parser.detect_and_parse_reasoning("normal text</think> more normal", &[]);
352        assert_eq!(result.normal_text, "normal text</think> more normal");
353        assert_eq!(result.reasoning_text, "");
354    }
355
356    #[test]
357    fn test_malformed_multiple_opening_tags() {
358        let mut parser =
359            BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, true);
360        let result = parser
361            .detect_and_parse_reasoning("<think>first <think>second reasoning</think> normal", &[]);
362        // Should handle by replacing all opening tags and using first closing tag
363        assert_eq!(result.normal_text, "normal");
364        assert_eq!(result.reasoning_text, "first second reasoning");
365    }
366
367    #[test]
368    fn test_empty_reasoning_block() {
369        let mut parser =
370            BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, true);
371        let result = parser.detect_and_parse_reasoning("<think></think> normal text", &[]);
372        assert_eq!(result.normal_text, "normal text");
373        assert_eq!(result.reasoning_text, "");
374    }
375
376    #[test]
377    fn test_whitespace_only_reasoning_block() {
378        let mut parser =
379            BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, true);
380        let result = parser.detect_and_parse_reasoning("<think>   \n\t  </think> normal text", &[]);
381        assert_eq!(result.normal_text, "normal text");
382        assert_eq!(result.reasoning_text, ""); // Should be empty after trim
383    }
384
385    #[test]
386    fn test_force_reasoning_mode() {
387        let mut parser =
388            BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), true, true);
389        let result = parser.detect_and_parse_reasoning("no think tags here", &[]);
390        assert_eq!(result.normal_text, "");
391        assert_eq!(result.reasoning_text, "no think tags here");
392    }
393
394    #[test]
395    fn test_streaming_reset_state_after_complete_block() {
396        let mut parser =
397            BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, false);
398
399        // Process complete reasoning block
400        let result1 =
401            parser.parse_reasoning_streaming_incremental("<think>reasoning</think> normal", &[]);
402        assert_eq!(result1.normal_text, " normal");
403        assert_eq!(result1.reasoning_text, "reasoning");
404
405        // Process normal text - should not be affected by previous state
406        let result2 = parser.parse_reasoning_streaming_incremental(" more normal text", &[]);
407        assert_eq!(result2.normal_text, " more normal text");
408        assert_eq!(result2.reasoning_text, "");
409
410        // Basic parser does not expect more than one reasoning block at a time
411        // So this should not affect the state
412        let result3 = parser
413            .parse_reasoning_streaming_incremental(" <think>new reasoning</think> final", &[]);
414        assert_eq!(result3.normal_text, " <think>new reasoning</think> final");
415        assert_eq!(result3.reasoning_text, "");
416    }
417}