const THINKING_TAGS: &[&str] = &["think", "thinking", "thought", "reasoning", "reflection"];
#[derive(Debug, Default, Clone)]
pub struct ThinkingTagStreamFilter {
inside: bool,
pending: String,
thought_buffer: Option<String>,
completed_thought: Option<String>,
}
impl ThinkingTagStreamFilter {
pub fn new() -> Self {
Self::default()
}
pub fn feed(&mut self, delta: &str) -> String {
let mut out = String::with_capacity(delta.len());
for ch in delta.chars() {
self.consume_char(ch, &mut out);
}
out
}
pub fn flush(&mut self) -> String {
let out = if self.inside {
self.thought_buffer.take();
String::new()
} else {
std::mem::take(&mut self.pending)
};
self.pending.clear();
self.inside = false;
self.completed_thought.take();
out
}
pub fn reset(&mut self) {
self.pending.clear();
self.inside = false;
self.thought_buffer.take();
self.completed_thought.take();
}
pub fn take_completed_thought(&mut self) -> Option<String> {
self.completed_thought.take()
}
fn consume_char(&mut self, ch: char, out: &mut String) {
if self.inside {
self.consume_inside(ch);
} else {
self.consume_outside(ch, out);
}
}
fn consume_outside(&mut self, ch: char, out: &mut String) {
if self.pending.is_empty() {
if ch == '<' {
self.pending.push(ch);
} else {
out.push(ch);
}
return;
}
self.pending.push(ch);
match classify(&self.pending, false) {
TagMatch::Complete => {
self.inside = true;
self.pending.clear();
self.thought_buffer = Some(String::new());
}
TagMatch::Possible => {}
TagMatch::No => {
out.push_str(&self.pending);
self.pending.clear();
}
}
}
fn consume_inside(&mut self, ch: char) {
if self.pending.is_empty() {
if ch == '<' {
self.pending.push(ch);
} else if let Some(ref mut buf) = self.thought_buffer {
buf.push(ch);
}
return;
}
self.pending.push(ch);
match classify(&self.pending, true) {
TagMatch::Complete => {
self.inside = false;
self.completed_thought = self.thought_buffer.take();
self.pending.clear();
}
TagMatch::Possible => {}
TagMatch::No => {
if let Some(ref mut buf) = self.thought_buffer {
buf.push_str(&self.pending);
} else {
self.thought_buffer = Some(std::mem::take(&mut self.pending));
}
self.pending.clear();
}
}
}
}
enum TagMatch {
Complete,
Possible,
No,
}
fn classify(buf: &str, closing: bool) -> TagMatch {
let lower = canonicalize_tag_candidate(buf);
for tag in THINKING_TAGS {
let full = if closing {
format!("</{tag}>")
} else {
format!("<{tag}>")
};
if lower == full {
return TagMatch::Complete;
}
if full.starts_with(&lower) {
return TagMatch::Possible;
}
}
TagMatch::No
}
fn canonicalize_tag_candidate(buf: &str) -> String {
let mut out = String::with_capacity(buf.len());
let mut chars = buf.chars().peekable();
let Some(first) = chars.next() else {
return out;
};
out.push(first.to_ascii_lowercase());
if first != '<' {
out.extend(chars.map(|ch| ch.to_ascii_lowercase()));
return out;
}
while matches!(chars.peek(), Some(ch) if ch.is_ascii_whitespace()) {
chars.next();
}
if matches!(chars.peek(), Some('/')) {
out.push('/');
chars.next();
while matches!(chars.peek(), Some(ch) if ch.is_ascii_whitespace()) {
chars.next();
}
}
for ch in chars {
if ch.is_ascii_whitespace() {
continue;
}
out.push(ch.to_ascii_lowercase());
}
out
}
pub fn strip_thinking_tags(text: &str) -> String {
let mut filter = ThinkingTagStreamFilter::new();
let mut result = filter.feed(text);
result.push_str(&filter.flush());
result.trim().to_string()
}
#[cfg(test)]
mod tests {
use super::*;
fn feed_all(filter: &mut ThinkingTagStreamFilter, deltas: &[&str]) -> String {
let mut out = String::new();
for d in deltas {
out.push_str(&filter.feed(d));
}
out.push_str(&filter.flush());
out
}
#[test]
fn passes_through_when_no_tag() {
let mut f = ThinkingTagStreamFilter::new();
assert_eq!(feed_all(&mut f, &["hello", " ", "world"]), "hello world");
assert!(f.take_completed_thought().is_none());
}
#[test]
fn strips_complete_thought_block_in_one_delta() {
let mut f = ThinkingTagStreamFilter::new();
let visible = feed_all(&mut f, &["<thought>hidden</thought>visible"]);
assert_eq!(visible, "visible");
}
#[test]
fn captures_completed_thought_text() {
let mut f = ThinkingTagStreamFilter::new();
let _ = f.feed("<thought>recall and frame</thought>");
assert_eq!(
f.take_completed_thought().as_deref(),
Some("recall and frame")
);
assert!(f.take_completed_thought().is_none());
}
#[test]
fn handles_tag_split_across_deltas() {
let mut f = ThinkingTagStreamFilter::new();
let visible = feed_all(
&mut f,
&[
"<thi", "nking", ">", "hidden ", "stuff", "</thi", "nking>", "out",
],
);
assert_eq!(visible, "out");
}
#[test]
fn tolerant_thought_tags_are_hidden() {
for raw in [
"< thought >hidden</ thought >visible",
"<\tthought>hidden</\tthought>visible",
"<Thinking >hidden</ Thinking >visible",
] {
let mut f = ThinkingTagStreamFilter::new();
assert_eq!(feed_all(&mut f, &[raw]), "visible", "raw={raw:?}");
}
}
#[test]
fn stray_lt_is_emitted_when_not_a_tag() {
let mut f = ThinkingTagStreamFilter::new();
let visible = feed_all(&mut f, &["a < b > c"]);
assert_eq!(visible, "a < b > c");
}
#[test]
fn unclosed_tag_drops_trailing_content() {
let mut f = ThinkingTagStreamFilter::new();
let visible = feed_all(&mut f, &["before<thought>never closed"]);
assert_eq!(visible, "before");
}
#[test]
fn synonyms_recognized() {
for tag in &["think", "thinking", "thought", "reasoning", "reflection"] {
let mut f = ThinkingTagStreamFilter::new();
let raw = format!("a<{tag}>x</{tag}>b");
assert_eq!(feed_all(&mut f, &[raw.as_str()]), "ab", "tag={tag}");
}
}
#[test]
fn case_insensitive_tag_recognition() {
let mut f = ThinkingTagStreamFilter::new();
let visible = feed_all(&mut f, &["<Thought>hidden</Thought>visible"]);
assert_eq!(visible, "visible");
}
#[test]
fn strip_thinking_tags_removes_blocks() {
assert_eq!(strip_thinking_tags("a<thought>x</thought>b"), "ab");
assert_eq!(strip_thinking_tags("<reasoning>r</reasoning>tail"), "tail");
assert_eq!(strip_thinking_tags("a< thought >x</ thought >b"), "ab");
}
#[test]
fn strip_thinking_tags_handles_unclosed() {
assert_eq!(strip_thinking_tags("keep<thought>drop the rest"), "keep");
}
#[test]
fn reset_clears_state() {
let mut f = ThinkingTagStreamFilter::new();
let _ = f.feed("<thought>partial");
f.reset();
assert_eq!(feed_all(&mut f, &["fresh"]), "fresh");
assert!(f.take_completed_thought().is_none());
}
}