vtcode_core/llm/providers/
reasoning.rs1use serde_json::Value;
2
3#[derive(Default, Clone)]
4pub struct ReasoningBuffer {
5 text: String,
6 last_chunk: Option<String>,
7}
8
9impl ReasoningBuffer {
10 #[inline]
11 pub fn push(&mut self, chunk: &str) -> Option<String> {
12 if chunk.is_empty() {
13 return None;
14 }
15
16 if self.last_chunk.as_deref() == Some(chunk) {
17 return None;
18 }
19
20 self.text.push_str(chunk);
21 self.last_chunk = Some(chunk.to_string());
22
23 Some(chunk.to_string())
24 }
25
26 pub fn finalize(self) -> Option<String> {
27 let trimmed = self.text.trim();
28 if trimmed.is_empty() {
29 None
30 } else {
31 Some(trimmed.to_string())
32 }
33 }
34}
35
36pub fn clean_reasoning_text(text: &str) -> String {
37 vtcode_commons::formatting::clean_reasoning_text(text)
38}
39
40const PRIMARY_TEXT_KEYS: &[&str] = &[
41 "text",
42 "content",
43 "reasoning",
44 "thought",
45 "thinking",
46 "value",
47];
48const SECONDARY_COLLECTION_KEYS: &[&str] = &[
49 "messages", "parts", "items", "entries", "steps", "segments", "records", "output", "outputs",
50 "logs",
51];
52
53const REASONING_TAGS: &[&str] = &["think", "thinking", "reasoning", "analysis", "thought"];
54const ANSWER_TAGS: &[&str] = &["answer", "final"];
55
56#[derive(Clone, Copy, PartialEq, Eq)]
57enum TagCategory {
58 Reasoning,
59 Answer,
60}
61
62struct ParsedTag<'a> {
63 name: &'a str,
64 end_index: usize,
65 category: TagCategory,
66}
67
68#[derive(Debug, Clone, PartialEq, Eq)]
69pub struct ReasoningSegment {
70 pub text: String,
71 pub stage: Option<String>,
72}
73
74impl ReasoningSegment {
75 pub fn new(text: impl Into<String>, stage: Option<String>) -> Self {
76 Self {
77 text: text.into(),
78 stage,
79 }
80 }
81}
82
83pub fn extract_reasoning_trace(value: &Value) -> Option<String> {
84 let mut segments = Vec::new();
85 collect_reasoning_segments(value, &mut segments);
86 let combined: Vec<String> = segments.into_iter().map(|s| s.text).collect();
87 let combined = combined.join("\n");
88 let trimmed = combined.trim();
89 if trimmed.is_empty() {
90 None
91 } else {
92 Some(trimmed.to_string())
93 }
94}
95
96fn collect_reasoning_segments(value: &Value, segments: &mut Vec<ReasoningSegment>) {
97 match value {
98 Value::Null => {}
99 Value::Bool(_) | Value::Number(_) => {}
100 Value::String(text) => {
101 let (mut tagged_segments, cleaned) = split_reasoning_from_text(text);
102
103 if !tagged_segments.is_empty() {
104 for segment in tagged_segments.drain(..) {
105 push_unique_segment(segments, segment);
106 }
107 if let Some(cleaned_text) = cleaned {
108 let trimmed = cleaned_text.trim();
109 if !trimmed.is_empty() {
110 push_unique_segment(segments, ReasoningSegment::new(trimmed, None));
111 }
112 }
113 return;
114 }
115
116 let trimmed = text.trim();
117 if trimmed.is_empty() {
118 return;
119 }
120
121 push_unique_segment(segments, ReasoningSegment::new(trimmed, None));
122 }
123 Value::Array(items) => {
124 for item in items {
125 collect_reasoning_segments(item, segments);
126 }
127 }
128 Value::Object(map) => {
129 let mut matched_key = false;
130 for key in PRIMARY_TEXT_KEYS {
131 if let Some(nested) = map.get(*key) {
132 collect_reasoning_segments(nested, segments);
133 matched_key = true;
134 }
135 }
136
137 if !matched_key {
138 for key in SECONDARY_COLLECTION_KEYS {
139 if let Some(nested) = map.get(*key) {
140 collect_reasoning_segments(nested, segments);
141 matched_key = true;
142 }
143 }
144 }
145
146 if !matched_key {
147 for nested in map.values() {
148 if matches!(nested, Value::Array(_) | Value::Object(_)) {
149 collect_reasoning_segments(nested, segments);
150 }
151 }
152 }
153 }
154 }
155}
156
157fn push_unique_segment(segments: &mut Vec<ReasoningSegment>, segment: ReasoningSegment) {
158 if segment.text.trim().is_empty() {
159 return;
160 }
161
162 if segments
163 .last()
164 .map(|last| last.text == segment.text && last.stage == segment.stage)
165 .unwrap_or(false)
166 {
167 return;
168 }
169
170 segments.push(segment);
171}
172
173fn parse_start_tag<'a>(lower: &'a str, start: usize) -> Option<ParsedTag<'a>> {
174 let bytes = lower.as_bytes();
175 let mut index = start + 1;
176
177 if index >= lower.len() {
178 return None;
179 }
180
181 match bytes[index] {
182 b'/' | b'!' | b'?' => return None,
183 _ => {}
184 }
185
186 while index < lower.len() && bytes[index].is_ascii_whitespace() {
187 index += 1;
188 }
189
190 if index >= lower.len() {
191 return None;
192 }
193
194 let name_start = index;
195 while index < lower.len() {
196 let ch = bytes[index];
197 if ch == b'>' || ch.is_ascii_whitespace() {
198 break;
199 }
200 index += 1;
201 }
202
203 if index == name_start {
204 return None;
205 }
206
207 let mut end_index = index;
208 while end_index < lower.len() && bytes[end_index] != b'>' {
209 end_index += 1;
210 }
211
212 if end_index >= lower.len() {
213 return None;
214 }
215
216 let name = &lower[name_start..index];
217 let category = if REASONING_TAGS.contains(&name) {
218 TagCategory::Reasoning
219 } else if ANSWER_TAGS.contains(&name) {
220 TagCategory::Answer
221 } else {
222 return None;
223 };
224
225 Some(ParsedTag {
226 name,
227 end_index,
228 category,
229 })
230}
231
232pub fn split_reasoning_from_text(text: &str) -> (Vec<ReasoningSegment>, Option<String>) {
233 if text.trim().is_empty() {
234 return (Vec::new(), None);
235 }
236
237 let lower = text.to_ascii_lowercase();
238 let mut segments: Vec<ReasoningSegment> = Vec::new();
239 let mut cleaned = String::new();
240 let mut modified = false;
241 let mut index = 0usize;
242
243 while index < text.len() {
244 let Some(relative) = lower[index..].find('<') else {
245 cleaned.push_str(&text[index..]);
246 break;
247 };
248
249 let open_index = index + relative;
250 cleaned.push_str(&text[index..open_index]);
251
252 if let Some(tag) = parse_start_tag(&lower, open_index) {
253 let content_start = tag.end_index + 1;
254 let close_sequence = format!("</{}>", tag.name);
255
256 if let Some(relative_close) = lower[content_start..].find(&close_sequence) {
257 let content_end = content_start + relative_close;
258 let inner = &text[content_start..content_end];
259
260 match tag.category {
261 TagCategory::Reasoning => {
262 modified = true;
263 let (nested_segments, nested_cleaned) = split_reasoning_from_text(inner);
264
265 if nested_segments.is_empty() {
266 let trimmed = inner.trim();
267 if !trimmed.is_empty() {
268 push_unique_segment(
270 &mut segments,
271 ReasoningSegment::new(trimmed, Some(tag.name.to_owned())),
272 );
273 }
274 } else {
275 for segment in nested_segments {
276 push_unique_segment(&mut segments, segment);
277 }
278 if let Some(cleaned_inner) = nested_cleaned {
279 let trimmed = cleaned_inner.trim();
280 if !trimmed.is_empty() {
281 push_unique_segment(
282 &mut segments,
283 ReasoningSegment::new(trimmed, Some(tag.name.to_owned())),
284 );
285 }
286 }
287 }
288 }
289 TagCategory::Answer => {
290 modified = true;
291 let (nested_segments, nested_cleaned) = split_reasoning_from_text(inner);
292 for segment in nested_segments {
293 push_unique_segment(&mut segments, segment);
294 }
295 if let Some(cleaned_inner) = nested_cleaned {
296 cleaned.push_str(&cleaned_inner);
297 } else {
298 let trimmed = inner.trim();
299 if !trimmed.is_empty() {
300 cleaned.push_str(trimmed);
301 }
302 }
303 }
304 }
305
306 index = content_end + close_sequence.len();
307 continue;
308 }
309 }
310
311 cleaned.push('<');
312 index = open_index + 1;
313 }
314
315 if !modified {
316 return (segments, None);
317 }
318
319 let output = if cleaned.trim().is_empty() {
320 None
321 } else {
322 Some(cleaned)
323 };
324
325 (segments, output)
326}
327
328#[cfg(test)]
329mod tests {
330 use super::*;
331
332 #[test]
333 fn extracts_text_from_string() {
334 let value = Value::String(" sample reasoning ".to_string());
335 let extracted = extract_reasoning_trace(&value);
336 assert_eq!(extracted, Some("sample reasoning".to_string()));
337 }
338
339 #[test]
340 fn extracts_text_from_nested_array() {
341 let value = Value::Array(vec![
342 Value::Object(
343 serde_json::json!({
344 "type": "thinking",
345 "text": "step one"
346 })
347 .as_object()
348 .unwrap()
349 .clone(),
350 ),
351 Value::Object(
352 serde_json::json!({
353 "type": "thinking",
354 "text": "step two"
355 })
356 .as_object()
357 .unwrap()
358 .clone(),
359 ),
360 ]);
361 let extracted = extract_reasoning_trace(&value);
362 assert_eq!(extracted, Some("step one\nstep two".to_string()));
363 }
364
365 #[test]
366 fn deduplicates_adjacent_segments() {
367 let value = Value::Array(vec![
368 Value::String("repeat".to_string()),
369 Value::String("repeat".to_string()),
370 Value::String("unique".to_string()),
371 ]);
372 let extracted = extract_reasoning_trace(&value);
373 assert_eq!(extracted, Some("repeat\nunique".to_string()));
374 }
375
376 #[test]
377 fn extracts_reasoning_from_think_markup() {
378 let source = "<think>first step</think>\n<answer>final output</answer>";
379 let (segments, cleaned) = split_reasoning_from_text(source);
380 assert_eq!(
381 segments,
382 vec![ReasoningSegment::new(
383 "first step",
384 Some("think".to_string())
385 )]
386 );
387 assert_eq!(cleaned, Some("\nfinal output".to_string()));
388 }
389
390 #[test]
391 fn handles_nested_reasoning_markup() {
392 let source = "<think><analysis>deep dive</analysis> summary</think>";
393 let (segments, cleaned) = split_reasoning_from_text(source);
394 assert_eq!(
395 segments,
396 vec![
397 ReasoningSegment::new("deep dive", Some("analysis".to_string())),
398 ReasoningSegment::new("summary", Some("think".to_string()))
399 ]
400 );
401 assert!(cleaned.is_none());
402 }
403
404 #[test]
405 fn cleans_blank_lines_from_reasoning() {
406 let input = "line1\n\n\nline2\n\n\n\nline3";
407 let cleaned = clean_reasoning_text(input);
408 assert_eq!(cleaned, "line1\nline2\nline3");
409 }
410
411 #[test]
412 fn cleans_leading_and_trailing_blank_lines() {
413 let input = "\n\nline1\n\n\n\n";
414 let cleaned = clean_reasoning_text(input);
415 assert_eq!(cleaned, "line1");
416 }
417
418 #[test]
419 fn handles_empty_and_whitespace_only() {
420 assert_eq!(clean_reasoning_text(""), "");
421 assert_eq!(clean_reasoning_text(" "), "");
422 assert_eq!(clean_reasoning_text("\n\n\n"), "");
423 }
424
425 #[test]
426 fn removes_single_blank_lines() {
427 let input = "line1\n\nline2";
428 let cleaned = clean_reasoning_text(input);
429 assert_eq!(cleaned, "line1\nline2");
430 }
431
432 #[test]
433 fn handles_mixed_whitespace_lines() {
434 let input = " line1 \n \n \n line2 \n\t\n \nline3";
435 let cleaned = clean_reasoning_text(input);
436 assert_eq!(cleaned, " line1\n line2\nline3");
437 }
438
439 #[test]
440 fn reasoning_buffer_preserves_leading_whitespace_spacing() {
441 let mut buffer = ReasoningBuffer::default();
442 let first = buffer.push("Hello");
443 assert_eq!(first.as_deref(), Some("Hello"));
444
445 let second = buffer.push(" world");
446 assert_eq!(second.as_deref(), Some(" world"));
447
448 let third = buffer.push("!");
449 assert_eq!(third.as_deref(), Some("!"));
450
451 let finalized = buffer.finalize();
452 assert_eq!(finalized.as_deref(), Some("Hello world!"));
453 }
454
455 #[test]
456 fn reasoning_buffer_keeps_subword_tokens_together() {
457 let mut buffer = ReasoningBuffer::default();
458 buffer.push("Andre");
459 buffer.push("j");
460 buffer.push(" Kar");
461 buffer.push("pathy");
462 buffer.push("'s");
463
464 let finalized = buffer.finalize();
465 assert_eq!(finalized.as_deref(), Some("Andrej Karpathy's"));
466 }
467}