Skip to main content

harn_vm/
visible_text.rs

1use std::collections::BTreeSet;
2use std::sync::OnceLock;
3
4use regex::Regex;
5
6#[derive(Default, Clone, Debug, PartialEq, Eq)]
7pub struct VisibleTextState {
8    raw_text: String,
9    last_visible_text: String,
10}
11
12impl VisibleTextState {
13    pub fn push(&mut self, delta: &str, partial: bool) -> (String, String) {
14        self.raw_text.push_str(delta);
15        let visible_text = sanitize_visible_assistant_text(&self.raw_text, partial);
16        let visible_delta = visible_text
17            .strip_prefix(&self.last_visible_text)
18            .unwrap_or(visible_text.as_str())
19            .to_string();
20        self.last_visible_text = visible_text.clone();
21        (visible_text, visible_delta)
22    }
23
24    pub fn clear(&mut self) {
25        self.raw_text.clear();
26        self.last_visible_text.clear();
27    }
28}
29
30fn internal_block_patterns() -> &'static [Regex] {
31    static PATTERNS: OnceLock<Vec<Regex>> = OnceLock::new();
32    PATTERNS.get_or_init(|| {
33        [
34            r"(?s)<think>.*?</think>",
35            r"(?s)<think>.*$",
36            r"(?s)<\|tool_call\|>.*?</\|tool_call\|>",
37            // Tagged response protocol: hide tool-call bodies (executed as
38            // structured data, never surfaced as narration) and done
39            // blocks (runtime signal, not user-facing).
40            r"(?s)<tool_call>.*?</tool_call>",
41            r"(?s)<done>.*?</done>",
42            r"(?s)<tool_result[^>]*>.*?</tool_result>",
43            r"(?s)\[result of [^\]]+\].*?\[end of [^\]]+\]",
44            r"(?m)^\s*(##DONE##|DONE|PLAN_READY)\s*$",
45        ]
46        .into_iter()
47        .map(|pattern| Regex::new(pattern).expect("valid assistant sanitization regex"))
48        .collect()
49    })
50}
51
52/// Strip the wrapper tags around `<assistant_prose>` blocks so the
53/// surfaced visible text reads as plain narration. Matched tags that
54/// are unclosed (model still streaming) are held back until the next
55/// chunk resolves them.
56fn unwrap_assistant_prose(text: &str) -> String {
57    static RE: OnceLock<Regex> = OnceLock::new();
58    let re = RE.get_or_init(|| {
59        Regex::new(r"(?s)<assistant_prose>\s*(.*?)\s*</assistant_prose>")
60            .expect("valid assistant_prose regex")
61    });
62    re.replace_all(text, "$1").to_string()
63}
64
65fn json_fence_regex() -> &'static Regex {
66    static JSON_FENCE: OnceLock<Regex> = OnceLock::new();
67    JSON_FENCE
68        .get_or_init(|| Regex::new(r"(?s)```json[^\n]*\n(.*?)```").expect("valid json fence regex"))
69}
70
71fn inline_planner_json_regex() -> &'static Regex {
72    static INLINE_PLANNER_JSON: OnceLock<Regex> = OnceLock::new();
73    INLINE_PLANNER_JSON.get_or_init(|| {
74        Regex::new(r#"(?s)\{\s*"mode"\s*:\s*"(?:fast_execute|plan_then_execute|ask_user)".*?\}"#)
75            .expect("valid inline planner json regex")
76    })
77}
78
79fn partial_inline_planner_json_regex() -> &'static Regex {
80    static PARTIAL_INLINE_PLANNER_JSON: OnceLock<Regex> = OnceLock::new();
81    PARTIAL_INLINE_PLANNER_JSON.get_or_init(|| {
82        Regex::new(r#"(?s)\{\s*"mode"\s*:\s*"(?:fast_execute|plan_then_execute|ask_user)".*$"#)
83            .expect("valid partial inline planner json regex")
84    })
85}
86
87fn looks_like_internal_planning_json(source: &str) -> bool {
88    let trimmed = source.trim();
89    if !(trimmed.starts_with('{') || trimmed.starts_with('[')) {
90        return false;
91    }
92
93    fn collect_keys(value: &serde_json::Value, keys: &mut BTreeSet<String>) {
94        match value {
95            serde_json::Value::Object(map) => {
96                for (key, child) in map {
97                    keys.insert(key.clone());
98                    collect_keys(child, keys);
99                }
100            }
101            serde_json::Value::Array(items) => {
102                for item in items {
103                    collect_keys(item, keys);
104                }
105            }
106            _ => {}
107        }
108    }
109
110    if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(trimmed) {
111        let mut keys = BTreeSet::new();
112        collect_keys(&parsed, &mut keys);
113        let has_planner_mode = match &parsed {
114            serde_json::Value::Object(map) => map
115                .get("mode")
116                .and_then(|value| value.as_str())
117                .is_some_and(|mode| {
118                    matches!(mode, "fast_execute" | "plan_then_execute" | "ask_user")
119                }),
120            _ => false,
121        };
122        let has_internal_keys = [
123            "plan",
124            "steps",
125            "tool_calls",
126            "tool_name",
127            "verification",
128            "execution_mode",
129            "required_outputs",
130            "files_to_edit",
131            "next_action",
132            "reasoning",
133            "direction",
134            "targets",
135            "tasks",
136            "unknowns",
137        ]
138        .into_iter()
139        .any(|key| keys.contains(key));
140        return has_planner_mode || has_internal_keys;
141    }
142
143    false
144}
145
146fn strip_internal_json_fences(text: &str) -> String {
147    json_fence_regex()
148        .replace_all(text, |caps: &regex::Captures| {
149            let body = caps
150                .get(1)
151                .map(|match_| match_.as_str())
152                .unwrap_or_default();
153            if looks_like_internal_planning_json(body) {
154                String::new()
155            } else {
156                caps.get(0)
157                    .map(|match_| match_.as_str().to_string())
158                    .unwrap_or_default()
159            }
160        })
161        .to_string()
162}
163
164fn strip_unclosed_internal_blocks(text: &str) -> String {
165    if let Some(open_idx) = text.rfind("<|tool_call|>") {
166        let close_idx = text.rfind("</|tool_call|>");
167        if close_idx.is_none_or(|idx| idx < open_idx) {
168            return text[..open_idx].to_string();
169        }
170    }
171
172    if let Some(open_idx) = text.rfind("<tool_call>") {
173        let close_idx = text.rfind("</tool_call>");
174        if close_idx.is_none_or(|idx| idx < open_idx) {
175            return text[..open_idx].to_string();
176        }
177    }
178
179    if let Some(open_idx) = text.rfind("<done>") {
180        let close_idx = text.rfind("</done>");
181        if close_idx.is_none_or(|idx| idx < open_idx) {
182            return text[..open_idx].to_string();
183        }
184    }
185
186    if let Some(open_idx) = text.rfind("[result of ") {
187        let close_idx = text.rfind("[end of ");
188        if close_idx.is_none_or(|idx| idx < open_idx) {
189            return text[..open_idx].to_string();
190        }
191    }
192
193    if let Some(open_idx) = text.rfind("<tool_result") {
194        let close_idx = text.rfind("</tool_result>");
195        if close_idx.is_none_or(|idx| idx < open_idx) {
196            return text[..open_idx].to_string();
197        }
198    }
199
200    text.to_string()
201}
202
203fn strip_inline_internal_planning_json(text: &str, partial: bool) -> String {
204    let mut stripped = inline_planner_json_regex()
205        .replace_all(text, "")
206        .to_string();
207    if partial {
208        stripped = partial_inline_planner_json_regex()
209            .replace_all(&stripped, "")
210            .to_string();
211    }
212    stripped
213}
214
215fn strip_partial_marker_suffix(text: &str) -> String {
216    const MARKERS: [&str; 9] = [
217        "<|tool_call|>",
218        "<tool_call>",
219        "<assistant_prose>",
220        "<done>",
221        "<tool_result",
222        "[result of ",
223        "##DONE##",
224        "DONE",
225        "PLAN_READY",
226    ];
227    for marker in MARKERS {
228        for len in (1..marker.len()).rev() {
229            let prefix = &marker[..len];
230            if let Some(stripped) = text.strip_suffix(prefix) {
231                return stripped.to_string();
232            }
233        }
234    }
235    text.to_string()
236}
237
238fn normalize_visible_whitespace(text: &str) -> String {
239    text.replace("\r\n", "\n")
240        .replace("\n\n\n", "\n\n")
241        .trim()
242        .to_string()
243}
244
245pub fn sanitize_visible_assistant_text(text: &str, partial: bool) -> String {
246    let mut sanitized = text.to_string();
247    for pattern in internal_block_patterns() {
248        sanitized = pattern.replace_all(&sanitized, "").to_string();
249    }
250    // After runtime tags are stripped, unwrap the <assistant_prose> wrapper
251    // so the user-visible stream reads as plain narration.
252    sanitized = unwrap_assistant_prose(&sanitized);
253    sanitized = strip_internal_json_fences(&sanitized);
254    sanitized = strip_inline_internal_planning_json(&sanitized, partial);
255    if partial {
256        sanitized = strip_unclosed_internal_blocks(&sanitized);
257        sanitized = strip_partial_marker_suffix(&sanitized);
258    }
259    normalize_visible_whitespace(&sanitized)
260}
261
262#[cfg(test)]
263mod tests {
264    use super::{sanitize_visible_assistant_text, VisibleTextState};
265
266    #[test]
267    fn push_emits_incremental_visible_delta_for_plain_chunks() {
268        let mut state = VisibleTextState::default();
269        let (visible, delta) = state.push("Hello", true);
270        assert_eq!(visible, "Hello");
271        assert_eq!(delta, "Hello");
272
273        let (visible, delta) = state.push(" world", true);
274        assert_eq!(visible, "Hello world");
275        assert_eq!(delta, " world");
276    }
277
278    #[test]
279    fn push_hides_open_think_block_until_closed() {
280        let mut state = VisibleTextState::default();
281        let (visible, delta) = state.push("Hi <think>secret", true);
282        assert_eq!(visible, "Hi");
283        assert_eq!(delta, "Hi");
284
285        let (visible, delta) = state.push(" plan</think> bye", true);
286        assert_eq!(visible, "Hi  bye");
287        assert_eq!(delta, "  bye");
288    }
289
290    #[test]
291    fn push_emits_full_visible_text_when_sanitization_shrinks_output() {
292        let mut state = VisibleTextState::default();
293        let (visible, _) = state.push("ok", true);
294        assert_eq!(visible, "ok");
295
296        let (visible, delta) = state.push(" <think>", true);
297        assert_eq!(visible, "ok");
298        // No prefix change so delta is empty.
299        assert_eq!(delta, "");
300    }
301
302    #[test]
303    fn push_partial_marker_suffix_is_held_back_until_resolved() {
304        let mut state = VisibleTextState::default();
305        let (visible, delta) = state.push("Hello\n##DON", true);
306        assert_eq!(visible, "Hello");
307        assert_eq!(delta, "Hello");
308
309        let (visible, delta) = state.push("E##\nmore", true);
310        assert_eq!(visible, "Hello\n\nmore");
311        assert_eq!(delta, "\n\nmore");
312    }
313
314    #[test]
315    fn clear_resets_streaming_state() {
316        let mut state = VisibleTextState::default();
317        let _ = state.push("Hello world", true);
318        state.clear();
319        let (visible, delta) = state.push("fresh", true);
320        assert_eq!(visible, "fresh");
321        assert_eq!(delta, "fresh");
322    }
323
324    #[test]
325    fn sanitize_drops_inline_planner_json_only_with_planner_mode() {
326        let raw = r#"{"mode":"plan_then_execute","plan":[]}"#;
327        assert_eq!(sanitize_visible_assistant_text(raw, false), "");
328        let raw = r#"{"status":"ok","message":"hello"}"#;
329        assert_eq!(sanitize_visible_assistant_text(raw, false), raw);
330    }
331}