reasoning_parser/parsers/
base.rs1use crate::traits::{ParseError, ParserConfig, ParserResult, ReasoningParser};
5
6#[derive(Debug, Clone)]
11pub struct BaseReasoningParser {
12 config: ParserConfig,
13 in_reasoning: bool,
14 buffer: String,
15 stripped_think_start: bool,
16 model_type: String,
17}
18
19impl BaseReasoningParser {
20 pub fn new(config: ParserConfig) -> Self {
22 let in_reasoning = config.always_in_reasoning;
23 Self {
24 config,
25 in_reasoning,
26 buffer: String::new(),
27 stripped_think_start: false,
28 model_type: "base".to_string(),
29 }
30 }
31
32 pub fn with_model_type(mut self, model_type: String) -> Self {
34 self.model_type = model_type;
35 self
36 }
37
38 fn is_partial_token(&self, text: &str) -> bool {
40 (self.config.think_start_token.starts_with(text) && self.config.think_start_token != text)
41 || (self.config.think_end_token.starts_with(text)
42 && self.config.think_end_token != text)
43 }
44}
45
46impl ReasoningParser for BaseReasoningParser {
47 fn detect_and_parse_reasoning(&mut self, text: &str) -> Result<ParserResult, ParseError> {
48 if text.len() > self.config.max_buffer_size {
50 return Err(ParseError::BufferOverflow(text.len()));
51 }
52
53 let in_reasoning = self.in_reasoning || text.contains(&self.config.think_start_token);
54
55 if !in_reasoning {
56 return Ok(ParserResult::normal(text.to_string()));
57 }
58
59 let processed_text = text
61 .replace(&self.config.think_start_token, "")
62 .trim()
63 .to_string();
64
65 if !processed_text.contains(&self.config.think_end_token) {
66 return Ok(ParserResult::reasoning(processed_text));
68 }
69
70 let splits: Vec<&str> = processed_text
72 .splitn(2, &self.config.think_end_token)
73 .collect();
74 let reasoning_text = (*splits.first().unwrap_or(&"")).to_string();
75 let normal_text = splits
76 .get(1)
77 .map(|s| s.trim().to_string())
78 .unwrap_or_default();
79
80 Ok(ParserResult::new(normal_text, reasoning_text))
81 }
82
83 fn parse_reasoning_streaming_incremental(
84 &mut self,
85 text: &str,
86 ) -> Result<ParserResult, ParseError> {
87 if self.buffer.len() + text.len() > self.config.max_buffer_size {
89 return Err(ParseError::BufferOverflow(self.buffer.len() + text.len()));
90 }
91
92 self.buffer.push_str(text);
94 let mut current_text = self.buffer.clone();
95
96 if self.is_partial_token(¤t_text) {
98 return Ok(ParserResult::default());
99 }
100
101 if !self.stripped_think_start && current_text.contains(&self.config.think_start_token) {
103 current_text = current_text.replace(&self.config.think_start_token, "");
104 self.buffer.clone_from(¤t_text);
105 self.stripped_think_start = true;
106 self.in_reasoning = true;
107 }
108
109 let think_end_idx = if self.in_reasoning {
111 current_text
112 .find(&self.config.think_end_token)
113 .unwrap_or(current_text.len())
114 } else {
115 current_text.len()
116 };
117
118 if self.in_reasoning && think_end_idx < current_text.len() {
119 let reasoning_text = ¤t_text[..think_end_idx];
120 self.buffer.clear();
121 self.in_reasoning = false;
122 let start_idx = think_end_idx + self.config.think_end_token.len();
123 let normal_text = if start_idx < current_text.len() {
124 ¤t_text[start_idx..]
125 } else {
126 ""
127 };
128 return Ok(ParserResult::new(
129 normal_text.to_string(),
130 reasoning_text.trim().to_string(),
131 ));
132 }
133
134 if self.in_reasoning && self.config.stream_reasoning {
136 let reasoning_text = current_text;
138 self.buffer.clear();
139 Ok(ParserResult::reasoning(reasoning_text))
140 } else if !self.in_reasoning {
141 let normal_text = current_text;
145 self.buffer.clear();
146 Ok(ParserResult::normal(normal_text))
147 } else {
148 Ok(ParserResult::default())
150 }
151 }
152
153 fn reset(&mut self) {
154 self.in_reasoning = self.config.always_in_reasoning;
155 self.buffer.clear();
156 self.stripped_think_start = false;
157 }
158
159 fn mark_reasoning_started(&mut self) {
160 self.in_reasoning = true;
161 }
162
163 fn mark_think_start_stripped(&mut self) {
164 self.stripped_think_start = true;
165 }
166
167 fn model_type(&self) -> &str {
168 &self.model_type
169 }
170
171 fn is_in_reasoning(&self) -> bool {
172 self.in_reasoning
173 }
174}
175
176#[cfg(test)]
177mod tests {
178 use super::*;
179 use crate::traits::DEFAULT_MAX_BUFFER_SIZE;
180
181 fn create_test_parser(
182 always_in_reasoning: bool,
183 stream_reasoning: bool,
184 ) -> BaseReasoningParser {
185 let config = ParserConfig {
186 think_start_token: "<think>".to_string(),
187 think_end_token: "</think>".to_string(),
188 stream_reasoning,
189 max_buffer_size: DEFAULT_MAX_BUFFER_SIZE,
190 always_in_reasoning,
191 };
192 BaseReasoningParser::new(config)
193 }
194
195 #[test]
196 fn test_detect_and_parse_reasoning() {
197 let mut parser = create_test_parser(false, true);
198 let result = parser
199 .detect_and_parse_reasoning("<think>with reasoning</think> and more text.")
200 .unwrap();
201 assert_eq!(result.normal_text, "and more text.");
202 assert_eq!(result.reasoning_text, "with reasoning");
203 }
204
205 #[test]
206 fn test_detect_and_parse_no_reasoning() {
207 let mut parser = create_test_parser(false, true);
208 let result = parser
209 .detect_and_parse_reasoning("This is a test without reasoning.")
210 .unwrap();
211 assert_eq!(result.normal_text, "This is a test without reasoning.");
212 assert_eq!(result.reasoning_text, "");
213 }
214
215 #[test]
216 fn test_detect_and_parse_truncated_reasoning() {
217 let mut parser = create_test_parser(false, true);
218 let result = parser
219 .detect_and_parse_reasoning("<think>with truncated reasoning")
220 .unwrap();
221 assert_eq!(result.normal_text, "");
222 assert_eq!(result.reasoning_text, "with truncated reasoning");
223 }
224
225 #[test]
226 fn test_parse_streaming_partial_token() {
227 let mut parser = create_test_parser(false, true);
228 let result = parser
229 .parse_reasoning_streaming_incremental("<thi")
230 .unwrap();
231 assert_eq!(result.normal_text, "");
232 assert_eq!(result.reasoning_text, "");
233 }
234
235 #[test]
236 fn test_parse_streaming_complete() {
237 let mut parser = create_test_parser(false, true);
238 let result = parser
239 .parse_reasoning_streaming_incremental("<think>with reasoning</think> and more text.")
240 .unwrap();
241 assert_eq!(result.normal_text, " and more text.");
242 assert_eq!(result.reasoning_text, "with reasoning");
243 }
244
245 #[test]
246 fn test_parse_streaming_no_end_token() {
247 let mut parser = create_test_parser(true, true);
248 let result = parser
249 .parse_reasoning_streaming_incremental("<think>with reasoning")
250 .unwrap();
251 assert_eq!(result.normal_text, "");
252 assert_eq!(result.reasoning_text, "with reasoning");
253 }
254
255 #[test]
256 fn test_always_in_reasoning_true() {
257 let mut parser = create_test_parser(true, true);
259 let result = parser
260 .detect_and_parse_reasoning("no think tags here")
261 .unwrap();
262 assert_eq!(result.normal_text, "");
263 assert_eq!(result.reasoning_text, "no think tags here");
264 }
265
266 #[test]
267 fn test_buffer_loss_bug_fix() {
268 let mut parser = create_test_parser(false, true);
270
271 let result1 = parser.parse_reasoning_streaming_incremental("</").unwrap();
273 assert_eq!(result1.normal_text, "");
274 assert_eq!(result1.reasoning_text, "");
275
276 let result2 = parser
279 .parse_reasoning_streaming_incremental("answer")
280 .unwrap();
281 assert_eq!(result2.normal_text, "</answer");
282 assert_eq!(result2.reasoning_text, "");
283 }
284
285 #[test]
286 fn test_streaming_with_stream_reasoning_enabled() {
287 let mut parser = create_test_parser(false, true);
288
289 let result1 = parser
291 .parse_reasoning_streaming_incremental("<think>reasoning ")
292 .unwrap();
293 assert_eq!(result1.normal_text, "");
294 assert_eq!(result1.reasoning_text, "reasoning ");
295
296 let result2 = parser
298 .parse_reasoning_streaming_incremental("content ")
299 .unwrap();
300 assert_eq!(result2.normal_text, "");
301 assert_eq!(result2.reasoning_text, "content ");
302
303 let result3 = parser
305 .parse_reasoning_streaming_incremental("more</think> normal")
306 .unwrap();
307 assert_eq!(result3.normal_text, " normal");
308 assert_eq!(result3.reasoning_text, "more");
309 }
310
311 #[test]
312 fn test_reset_state() {
313 let mut parser = create_test_parser(false, true);
314
315 parser
317 .parse_reasoning_streaming_incremental("<think>reasoning</think> normal")
318 .unwrap();
319
320 parser.reset();
322 assert!(!parser.in_reasoning);
323 assert!(parser.buffer.is_empty());
324 assert!(!parser.stripped_think_start);
325 }
326
327 #[test]
328 fn test_buffer_overflow_detect_and_parse() {
329 let config = ParserConfig {
330 max_buffer_size: 10, ..Default::default()
332 };
333 let mut parser = BaseReasoningParser::new(config);
334
335 let large_text = "a".repeat(20);
336 let result = parser.detect_and_parse_reasoning(&large_text);
337
338 assert!(result.is_err());
339 match result {
340 Err(ParseError::BufferOverflow(size)) => {
341 assert_eq!(size, 20);
342 }
343 _ => panic!("Expected BufferOverflow error"),
344 }
345 }
346
347 #[test]
348 fn test_buffer_overflow_streaming() {
349 let config = ParserConfig {
350 max_buffer_size: 10, ..Default::default()
352 };
353 let mut parser = BaseReasoningParser::new(config);
354
355 let result1 = parser.parse_reasoning_streaming_incremental("<thi");
357 assert!(result1.is_ok());
358 assert_eq!(result1.unwrap().normal_text, "");
359
360 let result2 = parser.parse_reasoning_streaming_incremental("this_is_too_large");
363 assert!(result2.is_err());
364 match result2 {
365 Err(ParseError::BufferOverflow(size)) => {
366 assert_eq!(size, 21); }
368 _ => panic!("Expected BufferOverflow error"),
369 }
370 }
371}