1use std::collections::BTreeSet;
2use std::sync::OnceLock;
3
4use regex::Regex;
5
6#[derive(Default, Clone, Debug, PartialEq, Eq)]
7pub struct VisibleTextState {
8 raw_text: String,
9 last_visible_text: String,
10}
11
12impl VisibleTextState {
13 pub fn push(&mut self, delta: &str, partial: bool) -> (String, String) {
14 self.raw_text.push_str(delta);
15 let visible_text = sanitize_visible_assistant_text(&self.raw_text, partial);
16 let visible_delta = visible_text
17 .strip_prefix(&self.last_visible_text)
18 .unwrap_or(visible_text.as_str())
19 .to_string();
20 self.last_visible_text = visible_text.clone();
21 (visible_text, visible_delta)
22 }
23
24 pub fn clear(&mut self) {
25 self.raw_text.clear();
26 self.last_visible_text.clear();
27 }
28}
29
30fn internal_block_patterns() -> &'static [Regex] {
31 static PATTERNS: OnceLock<Vec<Regex>> = OnceLock::new();
32 PATTERNS.get_or_init(|| {
33 [
34 r"(?s)<think>.*?</think>",
35 r"(?s)<think>.*$",
36 r"(?s)<\|tool_call\|>.*?</\|tool_call\|>",
37 r"(?s)<tool_call>.*?</tool_call>",
41 r"(?s)<done>.*?</done>",
42 r"(?s)<tool_result[^>]*>.*?</tool_result>",
43 r"(?s)\[result of [^\]]+\].*?\[end of [^\]]+\]",
44 r"(?m)^\s*(##DONE##|DONE|PLAN_READY)\s*$",
45 ]
46 .into_iter()
47 .map(|pattern| Regex::new(pattern).expect("valid assistant sanitization regex"))
48 .collect()
49 })
50}
51
52fn assistant_prose_regex() -> &'static Regex {
53 static RE: OnceLock<Regex> = OnceLock::new();
54 RE.get_or_init(|| {
55 Regex::new(r"(?s)<assistant_prose>\s*(.*?)\s*</assistant_prose>")
56 .expect("valid assistant_prose regex")
57 })
58}
59
60fn user_response_regex() -> &'static Regex {
61 static RE: OnceLock<Regex> = OnceLock::new();
62 RE.get_or_init(|| {
63 Regex::new(r"(?s)<user_response>\s*(.*?)\s*</user_response>")
64 .expect("valid user_response regex")
65 })
66}
67
68fn extract_user_response(text: &str) -> Option<String> {
69 let sections: Vec<String> = user_response_regex()
70 .captures_iter(text)
71 .filter_map(|caps| caps.get(1).map(|m| m.as_str().trim().to_string()))
72 .filter(|section| !section.is_empty())
73 .collect();
74 if sections.is_empty() {
75 None
76 } else {
77 Some(sections.join("\n\n"))
78 }
79}
80
81fn extract_visible_prose(text: &str) -> String {
86 if let Some(user_response) = extract_user_response(text) {
87 return user_response;
88 }
89 assistant_prose_regex().replace_all(text, "$1").to_string()
90}
91
92fn json_fence_regex() -> &'static Regex {
93 static JSON_FENCE: OnceLock<Regex> = OnceLock::new();
94 JSON_FENCE
95 .get_or_init(|| Regex::new(r"(?s)```json[^\n]*\n(.*?)```").expect("valid json fence regex"))
96}
97
98fn inline_planner_json_regex() -> &'static Regex {
99 static INLINE_PLANNER_JSON: OnceLock<Regex> = OnceLock::new();
100 INLINE_PLANNER_JSON.get_or_init(|| {
101 Regex::new(r#"(?s)\{\s*"mode"\s*:\s*"(?:fast_execute|plan_then_execute|ask_user)".*?\}"#)
102 .expect("valid inline planner json regex")
103 })
104}
105
106fn partial_inline_planner_json_regex() -> &'static Regex {
107 static PARTIAL_INLINE_PLANNER_JSON: OnceLock<Regex> = OnceLock::new();
108 PARTIAL_INLINE_PLANNER_JSON.get_or_init(|| {
109 Regex::new(r#"(?s)\{\s*"mode"\s*:\s*"(?:fast_execute|plan_then_execute|ask_user)".*$"#)
110 .expect("valid partial inline planner json regex")
111 })
112}
113
114fn looks_like_internal_planning_json(source: &str) -> bool {
115 let trimmed = source.trim();
116 if !(trimmed.starts_with('{') || trimmed.starts_with('[')) {
117 return false;
118 }
119
120 fn collect_keys(value: &serde_json::Value, keys: &mut BTreeSet<String>) {
121 match value {
122 serde_json::Value::Object(map) => {
123 for (key, child) in map {
124 keys.insert(key.clone());
125 collect_keys(child, keys);
126 }
127 }
128 serde_json::Value::Array(items) => {
129 for item in items {
130 collect_keys(item, keys);
131 }
132 }
133 _ => {}
134 }
135 }
136
137 if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(trimmed) {
138 let mut keys = BTreeSet::new();
139 collect_keys(&parsed, &mut keys);
140 let has_planner_mode = match &parsed {
141 serde_json::Value::Object(map) => map
142 .get("mode")
143 .and_then(|value| value.as_str())
144 .is_some_and(|mode| {
145 matches!(mode, "fast_execute" | "plan_then_execute" | "ask_user")
146 }),
147 _ => false,
148 };
149 let has_internal_keys = [
150 "plan",
151 "steps",
152 "tool_calls",
153 "tool_name",
154 "verification",
155 "execution_mode",
156 "required_outputs",
157 "files_to_edit",
158 "next_action",
159 "reasoning",
160 "direction",
161 "targets",
162 "tasks",
163 "unknowns",
164 ]
165 .into_iter()
166 .any(|key| keys.contains(key));
167 return has_planner_mode || has_internal_keys;
168 }
169
170 false
171}
172
173fn strip_internal_json_fences(text: &str) -> String {
174 json_fence_regex()
175 .replace_all(text, |caps: ®ex::Captures| {
176 let body = caps
177 .get(1)
178 .map(|match_| match_.as_str())
179 .unwrap_or_default();
180 if looks_like_internal_planning_json(body) {
181 String::new()
182 } else {
183 caps.get(0)
184 .map(|match_| match_.as_str().to_string())
185 .unwrap_or_default()
186 }
187 })
188 .to_string()
189}
190
191fn strip_unclosed_internal_blocks(text: &str) -> String {
192 if let Some(open_idx) = text.rfind("<|tool_call|>") {
193 let close_idx = text.rfind("</|tool_call|>");
194 if close_idx.is_none_or(|idx| idx < open_idx) {
195 return text[..open_idx].to_string();
196 }
197 }
198
199 if let Some(open_idx) = text.rfind("<tool_call>") {
200 let close_idx = text.rfind("</tool_call>");
201 if close_idx.is_none_or(|idx| idx < open_idx) {
202 return text[..open_idx].to_string();
203 }
204 }
205
206 if let Some(open_idx) = text.rfind("<done>") {
207 let close_idx = text.rfind("</done>");
208 if close_idx.is_none_or(|idx| idx < open_idx) {
209 return text[..open_idx].to_string();
210 }
211 }
212
213 if let Some(open_idx) = text.rfind("<user_response>") {
214 let close_idx = text.rfind("</user_response>");
215 if close_idx.is_none_or(|idx| idx < open_idx) {
216 return text[..open_idx].to_string();
217 }
218 }
219
220 if let Some(open_idx) = text.rfind("[result of ") {
221 let close_idx = text.rfind("[end of ");
222 if close_idx.is_none_or(|idx| idx < open_idx) {
223 return text[..open_idx].to_string();
224 }
225 }
226
227 if let Some(open_idx) = text.rfind("<tool_result") {
228 let close_idx = text.rfind("</tool_result>");
229 if close_idx.is_none_or(|idx| idx < open_idx) {
230 return text[..open_idx].to_string();
231 }
232 }
233
234 text.to_string()
235}
236
237fn strip_inline_internal_planning_json(text: &str, partial: bool) -> String {
238 let mut stripped = inline_planner_json_regex()
239 .replace_all(text, "")
240 .to_string();
241 if partial {
242 stripped = partial_inline_planner_json_regex()
243 .replace_all(&stripped, "")
244 .to_string();
245 }
246 stripped
247}
248
249fn strip_partial_marker_suffix(text: &str) -> String {
250 const MARKERS: [&str; 10] = [
251 "<|tool_call|>",
252 "<tool_call>",
253 "<assistant_prose>",
254 "<user_response>",
255 "<done>",
256 "<tool_result",
257 "[result of ",
258 "##DONE##",
259 "DONE",
260 "PLAN_READY",
261 ];
262 for marker in MARKERS {
263 for len in (1..marker.len()).rev() {
264 let prefix = &marker[..len];
265 if let Some(stripped) = text.strip_suffix(prefix) {
266 return stripped.to_string();
267 }
268 }
269 }
270 text.to_string()
271}
272
273fn normalize_visible_whitespace(text: &str) -> String {
274 text.replace("\r\n", "\n")
275 .replace("\n\n\n", "\n\n")
276 .trim()
277 .to_string()
278}
279
280pub fn sanitize_visible_assistant_text(text: &str, partial: bool) -> String {
281 let mut sanitized = text.to_string();
282 for pattern in internal_block_patterns() {
283 sanitized = pattern.replace_all(&sanitized, "").to_string();
284 }
285 sanitized = extract_visible_prose(&sanitized);
289 sanitized = strip_internal_json_fences(&sanitized);
290 sanitized = strip_inline_internal_planning_json(&sanitized, partial);
291 if partial {
292 sanitized = strip_unclosed_internal_blocks(&sanitized);
293 sanitized = strip_partial_marker_suffix(&sanitized);
294 }
295 normalize_visible_whitespace(&sanitized)
296}
297
298#[cfg(test)]
299mod tests {
300 use super::{sanitize_visible_assistant_text, VisibleTextState};
301
302 #[test]
303 fn push_emits_incremental_visible_delta_for_plain_chunks() {
304 let mut state = VisibleTextState::default();
305 let (visible, delta) = state.push("Hello", true);
306 assert_eq!(visible, "Hello");
307 assert_eq!(delta, "Hello");
308
309 let (visible, delta) = state.push(" world", true);
310 assert_eq!(visible, "Hello world");
311 assert_eq!(delta, " world");
312 }
313
314 #[test]
315 fn push_hides_open_think_block_until_closed() {
316 let mut state = VisibleTextState::default();
317 let (visible, delta) = state.push("Hi <think>secret", true);
318 assert_eq!(visible, "Hi");
319 assert_eq!(delta, "Hi");
320
321 let (visible, delta) = state.push(" plan</think> bye", true);
322 assert_eq!(visible, "Hi bye");
323 assert_eq!(delta, " bye");
324 }
325
326 #[test]
327 fn push_emits_full_visible_text_when_sanitization_shrinks_output() {
328 let mut state = VisibleTextState::default();
329 let (visible, _) = state.push("ok", true);
330 assert_eq!(visible, "ok");
331
332 let (visible, delta) = state.push(" <think>", true);
333 assert_eq!(visible, "ok");
334 assert_eq!(delta, "");
336 }
337
338 #[test]
339 fn push_partial_marker_suffix_is_held_back_until_resolved() {
340 let mut state = VisibleTextState::default();
341 let (visible, delta) = state.push("Hello\n##DON", true);
342 assert_eq!(visible, "Hello");
343 assert_eq!(delta, "Hello");
344
345 let (visible, delta) = state.push("E##\nmore", true);
346 assert_eq!(visible, "Hello\n\nmore");
347 assert_eq!(delta, "\n\nmore");
348 }
349
350 #[test]
351 fn clear_resets_streaming_state() {
352 let mut state = VisibleTextState::default();
353 let _ = state.push("Hello world", true);
354 state.clear();
355 let (visible, delta) = state.push("fresh", true);
356 assert_eq!(visible, "fresh");
357 assert_eq!(delta, "fresh");
358 }
359
360 #[test]
361 fn sanitize_drops_inline_planner_json_only_with_planner_mode() {
362 let raw = r#"{"mode":"plan_then_execute","plan":[]}"#;
363 assert_eq!(sanitize_visible_assistant_text(raw, false), "");
364 let raw = r#"{"status":"ok","message":"hello"}"#;
365 assert_eq!(sanitize_visible_assistant_text(raw, false), raw);
366 }
367
368 #[test]
369 fn sanitize_prefers_user_response_blocks_over_other_prose() {
370 let raw = "Working...\n<assistant_prose>internal narration</assistant_prose>\n<user_response>Visible answer.</user_response>\n##DONE##";
371 assert_eq!(
372 sanitize_visible_assistant_text(raw, false),
373 "Visible answer."
374 );
375 }
376}