Skip to main content

vtcode_core/llm/providers/
reasoning.rs

1use serde_json::Value;
2
3#[derive(Default, Clone)]
4pub struct ReasoningBuffer {
5    text: String,
6    last_chunk: Option<String>,
7}
8
9impl ReasoningBuffer {
10    #[inline]
11    pub fn push(&mut self, chunk: &str) -> Option<String> {
12        if chunk.is_empty() {
13            return None;
14        }
15
16        if self.last_chunk.as_deref() == Some(chunk) {
17            return None;
18        }
19
20        self.text.push_str(chunk);
21        self.last_chunk = Some(chunk.to_string());
22
23        Some(chunk.to_string())
24    }
25
26    pub fn finalize(self) -> Option<String> {
27        let trimmed = self.text.trim();
28        if trimmed.is_empty() {
29            None
30        } else {
31            Some(trimmed.to_string())
32        }
33    }
34}
35
36pub fn clean_reasoning_text(text: &str) -> String {
37    vtcode_commons::formatting::clean_reasoning_text(text)
38}
39
40const PRIMARY_TEXT_KEYS: &[&str] = &[
41    "text",
42    "content",
43    "reasoning",
44    "thought",
45    "thinking",
46    "value",
47];
48const SECONDARY_COLLECTION_KEYS: &[&str] = &[
49    "messages", "parts", "items", "entries", "steps", "segments", "records", "output", "outputs",
50    "logs",
51];
52
53const REASONING_TAGS: &[&str] = &["think", "thinking", "reasoning", "analysis", "thought"];
54const ANSWER_TAGS: &[&str] = &["answer", "final"];
55
56#[derive(Clone, Copy, PartialEq, Eq)]
57enum TagCategory {
58    Reasoning,
59    Answer,
60}
61
62struct ParsedTag<'a> {
63    name: &'a str,
64    end_index: usize,
65    category: TagCategory,
66}
67
68#[derive(Debug, Clone, PartialEq, Eq)]
69pub struct ReasoningSegment {
70    pub text: String,
71    pub stage: Option<String>,
72}
73
74impl ReasoningSegment {
75    pub fn new(text: impl Into<String>, stage: Option<String>) -> Self {
76        Self {
77            text: text.into(),
78            stage,
79        }
80    }
81}
82
83pub fn extract_reasoning_trace(value: &Value) -> Option<String> {
84    let mut segments = Vec::new();
85    collect_reasoning_segments(value, &mut segments);
86    let combined: Vec<String> = segments.into_iter().map(|s| s.text).collect();
87    let combined = combined.join("\n");
88    let trimmed = combined.trim();
89    if trimmed.is_empty() {
90        None
91    } else {
92        Some(trimmed.to_string())
93    }
94}
95
96fn collect_reasoning_segments(value: &Value, segments: &mut Vec<ReasoningSegment>) {
97    match value {
98        Value::Null => {}
99        Value::Bool(_) | Value::Number(_) => {}
100        Value::String(text) => {
101            let (mut tagged_segments, cleaned) = split_reasoning_from_text(text);
102
103            if !tagged_segments.is_empty() {
104                for segment in tagged_segments.drain(..) {
105                    push_unique_segment(segments, segment);
106                }
107                if let Some(cleaned_text) = cleaned {
108                    let trimmed = cleaned_text.trim();
109                    if !trimmed.is_empty() {
110                        push_unique_segment(segments, ReasoningSegment::new(trimmed, None));
111                    }
112                }
113                return;
114            }
115
116            let trimmed = text.trim();
117            if trimmed.is_empty() {
118                return;
119            }
120
121            push_unique_segment(segments, ReasoningSegment::new(trimmed, None));
122        }
123        Value::Array(items) => {
124            for item in items {
125                collect_reasoning_segments(item, segments);
126            }
127        }
128        Value::Object(map) => {
129            let mut matched_key = false;
130            for key in PRIMARY_TEXT_KEYS {
131                if let Some(nested) = map.get(*key) {
132                    collect_reasoning_segments(nested, segments);
133                    matched_key = true;
134                }
135            }
136
137            if !matched_key {
138                for key in SECONDARY_COLLECTION_KEYS {
139                    if let Some(nested) = map.get(*key) {
140                        collect_reasoning_segments(nested, segments);
141                        matched_key = true;
142                    }
143                }
144            }
145
146            if !matched_key {
147                for nested in map.values() {
148                    if matches!(nested, Value::Array(_) | Value::Object(_)) {
149                        collect_reasoning_segments(nested, segments);
150                    }
151                }
152            }
153        }
154    }
155}
156
157fn push_unique_segment(segments: &mut Vec<ReasoningSegment>, segment: ReasoningSegment) {
158    if segment.text.trim().is_empty() {
159        return;
160    }
161
162    if segments
163        .last()
164        .map(|last| last.text == segment.text && last.stage == segment.stage)
165        .unwrap_or(false)
166    {
167        return;
168    }
169
170    segments.push(segment);
171}
172
173fn parse_start_tag<'a>(lower: &'a str, start: usize) -> Option<ParsedTag<'a>> {
174    let bytes = lower.as_bytes();
175    let mut index = start + 1;
176
177    if index >= lower.len() {
178        return None;
179    }
180
181    match bytes[index] {
182        b'/' | b'!' | b'?' => return None,
183        _ => {}
184    }
185
186    while index < lower.len() && bytes[index].is_ascii_whitespace() {
187        index += 1;
188    }
189
190    if index >= lower.len() {
191        return None;
192    }
193
194    let name_start = index;
195    while index < lower.len() {
196        let ch = bytes[index];
197        if ch == b'>' || ch.is_ascii_whitespace() {
198            break;
199        }
200        index += 1;
201    }
202
203    if index == name_start {
204        return None;
205    }
206
207    let mut end_index = index;
208    while end_index < lower.len() && bytes[end_index] != b'>' {
209        end_index += 1;
210    }
211
212    if end_index >= lower.len() {
213        return None;
214    }
215
216    let name = &lower[name_start..index];
217    let category = if REASONING_TAGS.contains(&name) {
218        TagCategory::Reasoning
219    } else if ANSWER_TAGS.contains(&name) {
220        TagCategory::Answer
221    } else {
222        return None;
223    };
224
225    Some(ParsedTag {
226        name,
227        end_index,
228        category,
229    })
230}
231
232pub fn split_reasoning_from_text(text: &str) -> (Vec<ReasoningSegment>, Option<String>) {
233    if text.trim().is_empty() {
234        return (Vec::new(), None);
235    }
236
237    let lower = text.to_ascii_lowercase();
238    let mut segments: Vec<ReasoningSegment> = Vec::new();
239    let mut cleaned = String::new();
240    let mut modified = false;
241    let mut index = 0usize;
242
243    while index < text.len() {
244        let Some(relative) = lower[index..].find('<') else {
245            cleaned.push_str(&text[index..]);
246            break;
247        };
248
249        let open_index = index + relative;
250        cleaned.push_str(&text[index..open_index]);
251
252        if let Some(tag) = parse_start_tag(&lower, open_index) {
253            let content_start = tag.end_index + 1;
254            let close_sequence = format!("</{}>", tag.name);
255
256            if let Some(relative_close) = lower[content_start..].find(&close_sequence) {
257                let content_end = content_start + relative_close;
258                let inner = &text[content_start..content_end];
259
260                match tag.category {
261                    TagCategory::Reasoning => {
262                        modified = true;
263                        let (nested_segments, nested_cleaned) = split_reasoning_from_text(inner);
264
265                        if nested_segments.is_empty() {
266                            let trimmed = inner.trim();
267                            if !trimmed.is_empty() {
268                                // Use the tag name as the stage
269                                push_unique_segment(
270                                    &mut segments,
271                                    ReasoningSegment::new(trimmed, Some(tag.name.to_owned())),
272                                );
273                            }
274                        } else {
275                            for segment in nested_segments {
276                                push_unique_segment(&mut segments, segment);
277                            }
278                            if let Some(cleaned_inner) = nested_cleaned {
279                                let trimmed = cleaned_inner.trim();
280                                if !trimmed.is_empty() {
281                                    push_unique_segment(
282                                        &mut segments,
283                                        ReasoningSegment::new(trimmed, Some(tag.name.to_owned())),
284                                    );
285                                }
286                            }
287                        }
288                    }
289                    TagCategory::Answer => {
290                        modified = true;
291                        let (nested_segments, nested_cleaned) = split_reasoning_from_text(inner);
292                        for segment in nested_segments {
293                            push_unique_segment(&mut segments, segment);
294                        }
295                        if let Some(cleaned_inner) = nested_cleaned {
296                            cleaned.push_str(&cleaned_inner);
297                        } else {
298                            let trimmed = inner.trim();
299                            if !trimmed.is_empty() {
300                                cleaned.push_str(trimmed);
301                            }
302                        }
303                    }
304                }
305
306                index = content_end + close_sequence.len();
307                continue;
308            }
309        }
310
311        cleaned.push('<');
312        index = open_index + 1;
313    }
314
315    if !modified {
316        return (segments, None);
317    }
318
319    let output = if cleaned.trim().is_empty() {
320        None
321    } else {
322        Some(cleaned)
323    };
324
325    (segments, output)
326}
327
328#[cfg(test)]
329mod tests {
330    use super::*;
331
332    #[test]
333    fn extracts_text_from_string() {
334        let value = Value::String("  sample reasoning  ".to_string());
335        let extracted = extract_reasoning_trace(&value);
336        assert_eq!(extracted, Some("sample reasoning".to_string()));
337    }
338
339    #[test]
340    fn extracts_text_from_nested_array() {
341        let value = Value::Array(vec![
342            Value::Object(
343                serde_json::json!({
344                    "type": "thinking",
345                    "text": "step one"
346                })
347                .as_object()
348                .unwrap()
349                .clone(),
350            ),
351            Value::Object(
352                serde_json::json!({
353                    "type": "thinking",
354                    "text": "step two"
355                })
356                .as_object()
357                .unwrap()
358                .clone(),
359            ),
360        ]);
361        let extracted = extract_reasoning_trace(&value);
362        assert_eq!(extracted, Some("step one\nstep two".to_string()));
363    }
364
365    #[test]
366    fn deduplicates_adjacent_segments() {
367        let value = Value::Array(vec![
368            Value::String("repeat".to_string()),
369            Value::String("repeat".to_string()),
370            Value::String("unique".to_string()),
371        ]);
372        let extracted = extract_reasoning_trace(&value);
373        assert_eq!(extracted, Some("repeat\nunique".to_string()));
374    }
375
376    #[test]
377    fn extracts_reasoning_from_think_markup() {
378        let source = "<think>first step</think>\n<answer>final output</answer>";
379        let (segments, cleaned) = split_reasoning_from_text(source);
380        assert_eq!(
381            segments,
382            vec![ReasoningSegment::new(
383                "first step",
384                Some("think".to_string())
385            )]
386        );
387        assert_eq!(cleaned, Some("\nfinal output".to_string()));
388    }
389
390    #[test]
391    fn handles_nested_reasoning_markup() {
392        let source = "<think><analysis>deep dive</analysis> summary</think>";
393        let (segments, cleaned) = split_reasoning_from_text(source);
394        assert_eq!(
395            segments,
396            vec![
397                ReasoningSegment::new("deep dive", Some("analysis".to_string())),
398                ReasoningSegment::new("summary", Some("think".to_string()))
399            ]
400        );
401        assert!(cleaned.is_none());
402    }
403
404    #[test]
405    fn cleans_blank_lines_from_reasoning() {
406        let input = "line1\n\n\nline2\n\n\n\nline3";
407        let cleaned = clean_reasoning_text(input);
408        assert_eq!(cleaned, "line1\nline2\nline3");
409    }
410
411    #[test]
412    fn cleans_leading_and_trailing_blank_lines() {
413        let input = "\n\nline1\n\n\n\n";
414        let cleaned = clean_reasoning_text(input);
415        assert_eq!(cleaned, "line1");
416    }
417
418    #[test]
419    fn handles_empty_and_whitespace_only() {
420        assert_eq!(clean_reasoning_text(""), "");
421        assert_eq!(clean_reasoning_text("   "), "");
422        assert_eq!(clean_reasoning_text("\n\n\n"), "");
423    }
424
425    #[test]
426    fn removes_single_blank_lines() {
427        let input = "line1\n\nline2";
428        let cleaned = clean_reasoning_text(input);
429        assert_eq!(cleaned, "line1\nline2");
430    }
431
432    #[test]
433    fn handles_mixed_whitespace_lines() {
434        let input = "  line1  \n   \n  \n  line2  \n\t\n     \nline3";
435        let cleaned = clean_reasoning_text(input);
436        assert_eq!(cleaned, "  line1\n  line2\nline3");
437    }
438
439    #[test]
440    fn reasoning_buffer_preserves_leading_whitespace_spacing() {
441        let mut buffer = ReasoningBuffer::default();
442        let first = buffer.push("Hello");
443        assert_eq!(first.as_deref(), Some("Hello"));
444
445        let second = buffer.push(" world");
446        assert_eq!(second.as_deref(), Some(" world"));
447
448        let third = buffer.push("!");
449        assert_eq!(third.as_deref(), Some("!"));
450
451        let finalized = buffer.finalize();
452        assert_eq!(finalized.as_deref(), Some("Hello world!"));
453    }
454
455    #[test]
456    fn reasoning_buffer_keeps_subword_tokens_together() {
457        let mut buffer = ReasoningBuffer::default();
458        buffer.push("Andre");
459        buffer.push("j");
460        buffer.push(" Kar");
461        buffer.push("pathy");
462        buffer.push("'s");
463
464        let finalized = buffer.finalize();
465        assert_eq!(finalized.as_deref(), Some("Andrej Karpathy's"));
466    }
467}