1pub const THINK_BUF_MAX: usize = 64 * 1024; pub struct ThinkStripper {
8 carry: String,
11 inside: bool,
13}
14
15impl Default for ThinkStripper {
16 fn default() -> Self {
17 Self::new()
18 }
19}
20
21impl ThinkStripper {
22 pub fn new() -> Self {
23 Self {
24 carry: String::new(),
25 inside: false,
26 }
27 }
28
29 pub fn buffered_bytes(&self) -> usize {
30 self.carry.len()
31 }
32
33 pub fn reset(&mut self) {
40 self.carry.clear();
41 self.inside = false;
42 }
43
44 pub fn feed(&mut self, delta: &str) -> String {
46 if self.carry.len() + delta.len() > THINK_BUF_MAX {
50 let mut flushed = std::mem::take(&mut self.carry);
51 flushed.push_str(delta);
52 if self.inside {
53 return String::new(); }
55 return flushed;
56 }
57
58 self.carry.push_str(delta);
59 let mut out = String::new();
60 self.drain_into(&mut out);
61 out
62 }
63
64 fn drain_into(&mut self, out: &mut String) {
65 loop {
66 if self.inside {
67 match find_close_tag(&self.carry) {
69 Some((_close_start, close_end)) => {
70 self.carry.drain(..close_end);
71 self.inside = false;
72 }
74 None => {
75 let keep = 11.min(self.carry.len());
79 let drop_end = self.carry.len() - keep;
80 let safe = prev_boundary(&self.carry, drop_end);
81 self.carry.drain(..safe);
82 return;
83 }
84 }
85 } else {
86 match find_open_tag(&self.carry) {
87 TagScan::None => {
88 out.push_str(&self.carry);
89 self.carry.clear();
90 return;
91 }
92 TagScan::Complete { start, end } => {
93 out.push_str(&self.carry[..start]);
94 self.carry.drain(..end);
95 self.inside = true;
96 }
97 TagScan::PartialAt(pos) => {
98 out.push_str(&self.carry[..pos]);
99 self.carry.drain(..pos);
100 return;
101 }
102 }
103 }
104 }
105 }
106}
107
108enum TagScan {
109 None,
110 Complete { start: usize, end: usize },
111 PartialAt(usize),
112}
113
114fn find_open_tag(s: &str) -> TagScan {
117 let mut search_start = 0;
118 while let Some(lt) = s[search_start..].find('<') {
119 let abs = search_start + lt;
120 let rest = &s[abs..];
121 if let Some(end) = parse_open_tag(rest) {
122 return TagScan::Complete {
123 start: abs,
124 end: abs + end,
125 };
126 }
127 let lower: String = rest.chars().map(|c| c.to_ascii_lowercase()).collect();
129 let could_be_partial = lower.len() < 9 && "<thinking".starts_with(lower.as_str())
130 || lower.len() < 6 && "<think".starts_with(lower.as_str())
131 || lower.starts_with("<think") && !lower.contains('>')
132 || lower.starts_with("<thinking") && !lower.contains('>');
133 if could_be_partial {
134 return TagScan::PartialAt(abs);
135 }
136 search_start = abs + 1;
137 }
138 TagScan::None
139}
140
141fn parse_open_tag(s: &str) -> Option<usize> {
149 if !s.starts_with('<') {
150 return None;
151 }
152 let lower_head: String = s.chars().take(10).map(|c| c.to_ascii_lowercase()).collect();
153 let name_end = if lower_head.starts_with("<thinking") {
154 9
155 } else if lower_head.starts_with("<think") {
156 6
157 } else {
158 return None;
159 };
160 let after = &s[name_end..];
161 let first = after.chars().next()?;
162 if first == '>' {
163 return Some(name_end + 1);
164 }
165 if first.is_ascii_whitespace() {
166 if let Some(gt) = after.find('>') {
167 return Some(name_end + gt + 1);
168 }
169 }
170 None
171}
172
173fn find_close_tag(s: &str) -> Option<(usize, usize)> {
175 let lower: String = s.chars().map(|c| c.to_ascii_lowercase()).collect();
176 let p1 = lower
177 .find("</thinking>")
178 .map(|p| (p, p + "</thinking>".len()));
179 let p2 = lower.find("</think>").map(|p| (p, p + "</think>".len()));
180 match (p1, p2) {
181 (Some(a), Some(b)) => Some(if a.0 < b.0 { a } else { b }),
182 (Some(a), None) => Some(a),
183 (None, Some(b)) => Some(b),
184 (None, None) => None,
185 }
186}
187
188fn prev_boundary(s: &str, mut idx: usize) -> usize {
189 while idx > 0 && !s.is_char_boundary(idx) {
190 idx -= 1;
191 }
192 idx
193}
194
195#[cfg(test)]
196mod tests {
197 use super::*;
198
199 #[test]
200 fn no_tags_passes_through() {
201 let mut s = ThinkStripper::new();
202 assert_eq!(s.feed("hello world"), "hello world");
203 }
204
205 #[test]
206 fn complete_block_in_one_feed() {
207 let mut s = ThinkStripper::new();
208 assert_eq!(s.feed("a<think>secret</think>b"), "ab");
209 }
210
211 #[test]
212 fn tag_split_across_feeds() {
213 let mut s = ThinkStripper::new();
214 assert_eq!(s.feed("hello <thi"), "hello ");
215 assert_eq!(s.feed("nk>secret</think> world"), " world");
216 }
217
218 #[test]
219 fn utf8_boundary_at_feed_edge_no_panic() {
220 let mut s = ThinkStripper::new();
221 assert_eq!(s.feed("abc<thi"), "abc");
222 assert_eq!(s.feed("nk>密</think>你好"), "你好");
223 }
224
225 #[test]
226 fn case_insensitive_tag() {
227 let mut s = ThinkStripper::new();
228 assert_eq!(s.feed("<THINK>a</THINK>b"), "b");
229 let mut s2 = ThinkStripper::new();
230 assert_eq!(s2.feed("<Think>a</Think>b"), "b");
231 }
232
233 #[test]
234 fn thinking_tag_also_stripped() {
235 let mut s = ThinkStripper::new();
236 assert_eq!(s.feed("<thinking>a</thinking>b"), "b");
237 }
238
239 #[test]
240 fn tag_with_attributes() {
241 let mut s = ThinkStripper::new();
242 assert_eq!(s.feed("<think key=\"v\">a</think>b"), "b");
243 }
244
245 #[test]
246 fn unclosed_block_capped_at_buf_limit() {
247 let mut s = ThinkStripper::new();
248 let junk = "x".repeat(100_000);
249 let input = format!("<think>{}", junk);
250 let _ = s.feed(&input);
251 assert!(s.buffered_bytes() <= THINK_BUF_MAX);
252 }
253
254 #[test]
255 fn literal_angle_bracket_outside_tag_preserved() {
256 let mut s = ThinkStripper::new();
257 assert_eq!(s.feed("a < b > c"), "a < b > c");
258 }
259
260 #[test]
261 fn multiple_blocks() {
262 let mut s = ThinkStripper::new();
263 assert_eq!(s.feed("a<think>x</think>b<think>y</think>c"), "abc");
264 }
265
266 #[test]
275 fn reset_clears_stuck_inside_state() {
276 let mut s = ThinkStripper::new();
277 let _ = s.feed("prefix <think>still thinking when we got cut");
280 assert_eq!(
283 s.feed("hello from the next model"),
284 "",
285 "without reset, text leaks through the stuck inside=true state",
286 );
287 s.reset();
289 assert_eq!(
291 s.feed("hello from the next model"),
292 "hello from the next model"
293 );
294 }
295
296 #[test]
297 fn reset_from_pristine_state_is_a_noop() {
298 let mut s = ThinkStripper::new();
300 s.reset();
301 assert_eq!(s.feed("plain text"), "plain text");
302 }
303
304 #[test]
305 fn reset_clears_partial_carry_at_feed_boundary() {
306 let mut s = ThinkStripper::new();
310 assert_eq!(s.feed("hello <thi"), "hello "); assert!(s.buffered_bytes() > 0);
312 s.reset();
313 assert_eq!(s.buffered_bytes(), 0);
314 assert_eq!(s.feed("not a tag: <3"), "not a tag: <3");
317 }
318}