reasoning_parser/parsers/
base.rs1use crate::traits::{ParseError, ParserConfig, ParserResult, ReasoningParser};
5
6#[derive(Debug, Clone)]
11pub struct BaseReasoningParser {
12 config: ParserConfig,
13 in_reasoning: bool,
14 buffer: String,
15 stripped_think_start: bool,
16 model_type: String,
17}
18
19impl BaseReasoningParser {
20 pub fn new(config: ParserConfig) -> Self {
22 let in_reasoning = config.initial_in_reasoning;
23 Self {
24 config,
25 in_reasoning,
26 buffer: String::new(),
27 stripped_think_start: false,
28 model_type: "base".to_string(),
29 }
30 }
31
32 pub fn with_model_type(mut self, model_type: String) -> Self {
34 self.model_type = model_type;
35 self
36 }
37
38 fn is_partial_token(&self, text: &str) -> bool {
40 (self.config.think_start_token.starts_with(text) && self.config.think_start_token != text)
41 || (self.config.think_end_token.starts_with(text)
42 && self.config.think_end_token != text)
43 }
44}
45
46impl ReasoningParser for BaseReasoningParser {
47 fn detect_and_parse_reasoning(&mut self, text: &str) -> Result<ParserResult, ParseError> {
48 if text.len() > self.config.max_buffer_size {
50 return Err(ParseError::BufferOverflow(text.len()));
51 }
52
53 let in_reasoning = self.in_reasoning || text.contains(&self.config.think_start_token);
54
55 if !in_reasoning {
56 return Ok(ParserResult::normal(text.to_string()));
57 }
58
59 let processed_text = text
61 .replace(&self.config.think_start_token, "")
62 .trim()
63 .to_string();
64
65 if !processed_text.contains(&self.config.think_end_token) {
66 return Ok(ParserResult::reasoning(processed_text));
68 }
69
70 let splits: Vec<&str> = processed_text
72 .splitn(2, &self.config.think_end_token)
73 .collect();
74 let reasoning_text = (*splits.first().unwrap_or(&"")).to_string();
75 let normal_text = splits
76 .get(1)
77 .map(|s| s.trim().to_string())
78 .unwrap_or_default();
79
80 Ok(ParserResult::new(normal_text, reasoning_text))
81 }
82
83 fn parse_reasoning_streaming_incremental(
84 &mut self,
85 text: &str,
86 ) -> Result<ParserResult, ParseError> {
87 if self.buffer.len() + text.len() > self.config.max_buffer_size {
89 return Err(ParseError::BufferOverflow(self.buffer.len() + text.len()));
90 }
91
92 self.buffer.push_str(text);
94 let mut current_text = self.buffer.clone();
95
96 if self.is_partial_token(¤t_text) {
98 return Ok(ParserResult::default());
99 }
100
101 if !self.stripped_think_start && current_text.contains(&self.config.think_start_token) {
103 current_text = current_text.replace(&self.config.think_start_token, "");
104 self.buffer.clone_from(¤t_text);
105 self.stripped_think_start = true;
106 self.in_reasoning = true;
107 }
108
109 let think_end_idx = if self.in_reasoning {
111 current_text
112 .find(&self.config.think_end_token)
113 .unwrap_or(current_text.len())
114 } else {
115 current_text.len()
116 };
117
118 if self.in_reasoning && think_end_idx < current_text.len() {
119 let reasoning_text = ¤t_text[..think_end_idx];
120 self.buffer.clear();
121 self.in_reasoning = false;
122 let start_idx = think_end_idx + self.config.think_end_token.len();
123 let normal_text = if start_idx < current_text.len() {
124 ¤t_text[start_idx..]
125 } else {
126 ""
127 };
128 return Ok(ParserResult::new(
129 normal_text.to_string(),
130 reasoning_text.trim().to_string(),
131 ));
132 }
133
134 if self.in_reasoning && self.config.stream_reasoning {
136 let reasoning_text = current_text;
138 self.buffer.clear();
139 Ok(ParserResult::reasoning(reasoning_text))
140 } else if !self.in_reasoning {
141 let normal_text = current_text;
145 self.buffer.clear();
146 Ok(ParserResult::normal(normal_text))
147 } else {
148 Ok(ParserResult::default())
150 }
151 }
152
153 fn reset(&mut self) {
154 self.in_reasoning = self.config.initial_in_reasoning;
155 self.buffer.clear();
156 self.stripped_think_start = false;
157 }
158
159 fn model_type(&self) -> &str {
160 &self.model_type
161 }
162
163 fn is_in_reasoning(&self) -> bool {
164 self.in_reasoning
165 }
166}
167
168#[cfg(test)]
169mod tests {
170 use super::*;
171 use crate::traits::DEFAULT_MAX_BUFFER_SIZE;
172
173 fn create_test_parser(
174 initial_in_reasoning: bool,
175 stream_reasoning: bool,
176 ) -> BaseReasoningParser {
177 let config = ParserConfig {
178 think_start_token: "<think>".to_string(),
179 think_end_token: "</think>".to_string(),
180 stream_reasoning,
181 max_buffer_size: DEFAULT_MAX_BUFFER_SIZE,
182 initial_in_reasoning,
183 };
184 BaseReasoningParser::new(config)
185 }
186
187 #[test]
188 fn test_detect_and_parse_reasoning() {
189 let mut parser = create_test_parser(false, true);
190 let result = parser
191 .detect_and_parse_reasoning("<think>with reasoning</think> and more text.")
192 .unwrap();
193 assert_eq!(result.normal_text, "and more text.");
194 assert_eq!(result.reasoning_text, "with reasoning");
195 }
196
197 #[test]
198 fn test_detect_and_parse_no_reasoning() {
199 let mut parser = create_test_parser(false, true);
200 let result = parser
201 .detect_and_parse_reasoning("This is a test without reasoning.")
202 .unwrap();
203 assert_eq!(result.normal_text, "This is a test without reasoning.");
204 assert_eq!(result.reasoning_text, "");
205 }
206
207 #[test]
208 fn test_detect_and_parse_truncated_reasoning() {
209 let mut parser = create_test_parser(false, true);
210 let result = parser
211 .detect_and_parse_reasoning("<think>with truncated reasoning")
212 .unwrap();
213 assert_eq!(result.normal_text, "");
214 assert_eq!(result.reasoning_text, "with truncated reasoning");
215 }
216
217 #[test]
218 fn test_parse_streaming_partial_token() {
219 let mut parser = create_test_parser(false, true);
220 let result = parser
221 .parse_reasoning_streaming_incremental("<thi")
222 .unwrap();
223 assert_eq!(result.normal_text, "");
224 assert_eq!(result.reasoning_text, "");
225 }
226
227 #[test]
228 fn test_parse_streaming_complete() {
229 let mut parser = create_test_parser(false, true);
230 let result = parser
231 .parse_reasoning_streaming_incremental("<think>with reasoning</think> and more text.")
232 .unwrap();
233 assert_eq!(result.normal_text, " and more text.");
234 assert_eq!(result.reasoning_text, "with reasoning");
235 }
236
237 #[test]
238 fn test_parse_streaming_no_end_token() {
239 let mut parser = create_test_parser(true, true);
240 let result = parser
241 .parse_reasoning_streaming_incremental("<think>with reasoning")
242 .unwrap();
243 assert_eq!(result.normal_text, "");
244 assert_eq!(result.reasoning_text, "with reasoning");
245 }
246
247 #[test]
248 fn test_initial_in_reasoning_true() {
249 let mut parser = create_test_parser(true, true);
251 let result = parser
252 .detect_and_parse_reasoning("no think tags here")
253 .unwrap();
254 assert_eq!(result.normal_text, "");
255 assert_eq!(result.reasoning_text, "no think tags here");
256 }
257
258 #[test]
259 fn test_buffer_loss_bug_fix() {
260 let mut parser = create_test_parser(false, true);
262
263 let result1 = parser.parse_reasoning_streaming_incremental("</").unwrap();
265 assert_eq!(result1.normal_text, "");
266 assert_eq!(result1.reasoning_text, "");
267
268 let result2 = parser
271 .parse_reasoning_streaming_incremental("answer")
272 .unwrap();
273 assert_eq!(result2.normal_text, "</answer");
274 assert_eq!(result2.reasoning_text, "");
275 }
276
277 #[test]
278 fn test_streaming_with_stream_reasoning_enabled() {
279 let mut parser = create_test_parser(false, true);
280
281 let result1 = parser
283 .parse_reasoning_streaming_incremental("<think>reasoning ")
284 .unwrap();
285 assert_eq!(result1.normal_text, "");
286 assert_eq!(result1.reasoning_text, "reasoning ");
287
288 let result2 = parser
290 .parse_reasoning_streaming_incremental("content ")
291 .unwrap();
292 assert_eq!(result2.normal_text, "");
293 assert_eq!(result2.reasoning_text, "content ");
294
295 let result3 = parser
297 .parse_reasoning_streaming_incremental("more</think> normal")
298 .unwrap();
299 assert_eq!(result3.normal_text, " normal");
300 assert_eq!(result3.reasoning_text, "more");
301 }
302
303 #[test]
304 fn test_reset_state() {
305 let mut parser = create_test_parser(false, true);
306
307 parser
309 .parse_reasoning_streaming_incremental("<think>reasoning</think> normal")
310 .unwrap();
311
312 parser.reset();
314 assert!(!parser.in_reasoning);
315 assert!(parser.buffer.is_empty());
316 assert!(!parser.stripped_think_start);
317 }
318
319 #[test]
320 fn test_buffer_overflow_detect_and_parse() {
321 let config = ParserConfig {
322 max_buffer_size: 10, ..Default::default()
324 };
325 let mut parser = BaseReasoningParser::new(config);
326
327 let large_text = "a".repeat(20);
328 let result = parser.detect_and_parse_reasoning(&large_text);
329
330 assert!(result.is_err());
331 match result {
332 Err(ParseError::BufferOverflow(size)) => {
333 assert_eq!(size, 20);
334 }
335 _ => panic!("Expected BufferOverflow error"),
336 }
337 }
338
339 #[test]
340 fn test_buffer_overflow_streaming() {
341 let config = ParserConfig {
342 max_buffer_size: 10, ..Default::default()
344 };
345 let mut parser = BaseReasoningParser::new(config);
346
347 let result1 = parser.parse_reasoning_streaming_incremental("<thi");
349 assert!(result1.is_ok());
350 assert_eq!(result1.unwrap().normal_text, "");
351
352 let result2 = parser.parse_reasoning_streaming_incremental("this_is_too_large");
355 assert!(result2.is_err());
356 match result2 {
357 Err(ParseError::BufferOverflow(size)) => {
358 assert_eq!(size, 21); }
360 _ => panic!("Expected BufferOverflow error"),
361 }
362 }
363}