Skip to main content

harn_vm/
visible_text.rs

1use std::collections::BTreeSet;
2use std::sync::OnceLock;
3
4use regex::Regex;
5
6#[derive(Default, Clone, Debug, PartialEq, Eq)]
7pub struct VisibleTextState {
8    raw_text: String,
9    last_visible_text: String,
10}
11
12impl VisibleTextState {
13    pub fn push(&mut self, delta: &str, partial: bool) -> (String, String) {
14        self.raw_text.push_str(delta);
15        let visible_text = sanitize_visible_assistant_text(&self.raw_text, partial);
16        let visible_delta = visible_text
17            .strip_prefix(&self.last_visible_text)
18            .unwrap_or(visible_text.as_str())
19            .to_string();
20        self.last_visible_text = visible_text.clone();
21        (visible_text, visible_delta)
22    }
23
24    pub fn clear(&mut self) {
25        self.raw_text.clear();
26        self.last_visible_text.clear();
27    }
28}
29
30fn internal_block_patterns() -> &'static [Regex] {
31    static PATTERNS: OnceLock<Vec<Regex>> = OnceLock::new();
32    PATTERNS.get_or_init(|| {
33        [
34            r"(?s)<think>.*?</think>",
35            r"(?s)<think>.*$",
36            r"(?s)<\|tool_call\|>.*?</\|tool_call\|>",
37            r"(?s)<tool_result[^>]*>.*?</tool_result>",
38            r"(?s)\[result of [^\]]+\].*?\[end of [^\]]+\]",
39            r"(?m)^\s*(##DONE##|DONE|PLAN_READY)\s*$",
40        ]
41        .into_iter()
42        .map(|pattern| Regex::new(pattern).expect("valid assistant sanitization regex"))
43        .collect()
44    })
45}
46
47fn json_fence_regex() -> &'static Regex {
48    static JSON_FENCE: OnceLock<Regex> = OnceLock::new();
49    JSON_FENCE
50        .get_or_init(|| Regex::new(r"(?s)```json[^\n]*\n(.*?)```").expect("valid json fence regex"))
51}
52
53fn inline_planner_json_regex() -> &'static Regex {
54    static INLINE_PLANNER_JSON: OnceLock<Regex> = OnceLock::new();
55    INLINE_PLANNER_JSON.get_or_init(|| {
56        Regex::new(r#"(?s)\{\s*"mode"\s*:\s*"(?:fast_execute|plan_then_execute|ask_user)".*?\}"#)
57            .expect("valid inline planner json regex")
58    })
59}
60
61fn partial_inline_planner_json_regex() -> &'static Regex {
62    static PARTIAL_INLINE_PLANNER_JSON: OnceLock<Regex> = OnceLock::new();
63    PARTIAL_INLINE_PLANNER_JSON.get_or_init(|| {
64        Regex::new(r#"(?s)\{\s*"mode"\s*:\s*"(?:fast_execute|plan_then_execute|ask_user)".*$"#)
65            .expect("valid partial inline planner json regex")
66    })
67}
68
69fn looks_like_internal_planning_json(source: &str) -> bool {
70    let trimmed = source.trim();
71    if !(trimmed.starts_with('{') || trimmed.starts_with('[')) {
72        return false;
73    }
74
75    fn collect_keys(value: &serde_json::Value, keys: &mut BTreeSet<String>) {
76        match value {
77            serde_json::Value::Object(map) => {
78                for (key, child) in map {
79                    keys.insert(key.clone());
80                    collect_keys(child, keys);
81                }
82            }
83            serde_json::Value::Array(items) => {
84                for item in items {
85                    collect_keys(item, keys);
86                }
87            }
88            _ => {}
89        }
90    }
91
92    if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(trimmed) {
93        let mut keys = BTreeSet::new();
94        collect_keys(&parsed, &mut keys);
95        let has_planner_mode = match &parsed {
96            serde_json::Value::Object(map) => map
97                .get("mode")
98                .and_then(|value| value.as_str())
99                .is_some_and(|mode| {
100                    matches!(mode, "fast_execute" | "plan_then_execute" | "ask_user")
101                }),
102            _ => false,
103        };
104        let has_internal_keys = [
105            "plan",
106            "steps",
107            "tool_calls",
108            "tool_name",
109            "verification",
110            "execution_mode",
111            "required_outputs",
112            "files_to_edit",
113            "next_action",
114            "reasoning",
115            "direction",
116            "targets",
117            "tasks",
118            "unknowns",
119        ]
120        .into_iter()
121        .any(|key| keys.contains(key));
122        return has_planner_mode || has_internal_keys;
123    }
124
125    false
126}
127
128fn strip_internal_json_fences(text: &str) -> String {
129    json_fence_regex()
130        .replace_all(text, |caps: &regex::Captures| {
131            let body = caps
132                .get(1)
133                .map(|match_| match_.as_str())
134                .unwrap_or_default();
135            if looks_like_internal_planning_json(body) {
136                String::new()
137            } else {
138                caps.get(0)
139                    .map(|match_| match_.as_str().to_string())
140                    .unwrap_or_default()
141            }
142        })
143        .to_string()
144}
145
146fn strip_unclosed_internal_blocks(text: &str) -> String {
147    if let Some(open_idx) = text.rfind("<|tool_call|>") {
148        let close_idx = text.rfind("</|tool_call|>");
149        if close_idx.is_none_or(|idx| idx < open_idx) {
150            return text[..open_idx].to_string();
151        }
152    }
153
154    if let Some(open_idx) = text.rfind("[result of ") {
155        let close_idx = text.rfind("[end of ");
156        if close_idx.is_none_or(|idx| idx < open_idx) {
157            return text[..open_idx].to_string();
158        }
159    }
160
161    if let Some(open_idx) = text.rfind("<tool_result") {
162        let close_idx = text.rfind("</tool_result>");
163        if close_idx.is_none_or(|idx| idx < open_idx) {
164            return text[..open_idx].to_string();
165        }
166    }
167
168    text.to_string()
169}
170
171fn strip_inline_internal_planning_json(text: &str, partial: bool) -> String {
172    let mut stripped = inline_planner_json_regex()
173        .replace_all(text, "")
174        .to_string();
175    if partial {
176        stripped = partial_inline_planner_json_regex()
177            .replace_all(&stripped, "")
178            .to_string();
179    }
180    stripped
181}
182
183fn strip_partial_marker_suffix(text: &str) -> String {
184    const MARKERS: [&str; 6] = [
185        "<|tool_call|>",
186        "<tool_result",
187        "[result of ",
188        "##DONE##",
189        "DONE",
190        "PLAN_READY",
191    ];
192    for marker in MARKERS {
193        for len in (1..marker.len()).rev() {
194            let prefix = &marker[..len];
195            if let Some(stripped) = text.strip_suffix(prefix) {
196                return stripped.to_string();
197            }
198        }
199    }
200    text.to_string()
201}
202
203fn normalize_visible_whitespace(text: &str) -> String {
204    text.replace("\r\n", "\n")
205        .replace("\n\n\n", "\n\n")
206        .trim()
207        .to_string()
208}
209
210pub fn sanitize_visible_assistant_text(text: &str, partial: bool) -> String {
211    let mut sanitized = text.to_string();
212    for pattern in internal_block_patterns() {
213        sanitized = pattern.replace_all(&sanitized, "").to_string();
214    }
215    sanitized = strip_internal_json_fences(&sanitized);
216    sanitized = strip_inline_internal_planning_json(&sanitized, partial);
217    if partial {
218        sanitized = strip_unclosed_internal_blocks(&sanitized);
219        sanitized = strip_partial_marker_suffix(&sanitized);
220    }
221    normalize_visible_whitespace(&sanitized)
222}
223
224#[cfg(test)]
225mod tests {
226    use super::{sanitize_visible_assistant_text, VisibleTextState};
227
228    #[test]
229    fn push_emits_incremental_visible_delta_for_plain_chunks() {
230        let mut state = VisibleTextState::default();
231        let (visible, delta) = state.push("Hello", true);
232        assert_eq!(visible, "Hello");
233        assert_eq!(delta, "Hello");
234
235        let (visible, delta) = state.push(" world", true);
236        assert_eq!(visible, "Hello world");
237        assert_eq!(delta, " world");
238    }
239
240    #[test]
241    fn push_hides_open_think_block_until_closed() {
242        let mut state = VisibleTextState::default();
243        let (visible, delta) = state.push("Hi <think>secret", true);
244        assert_eq!(visible, "Hi");
245        assert_eq!(delta, "Hi");
246
247        let (visible, delta) = state.push(" plan</think> bye", true);
248        assert_eq!(visible, "Hi  bye");
249        assert_eq!(delta, "  bye");
250    }
251
252    #[test]
253    fn push_emits_full_visible_text_when_sanitization_shrinks_output() {
254        let mut state = VisibleTextState::default();
255        let (visible, _) = state.push("ok", true);
256        assert_eq!(visible, "ok");
257
258        let (visible, delta) = state.push(" <think>", true);
259        assert_eq!(visible, "ok");
260        // No prefix change so delta is empty.
261        assert_eq!(delta, "");
262    }
263
264    #[test]
265    fn push_partial_marker_suffix_is_held_back_until_resolved() {
266        let mut state = VisibleTextState::default();
267        let (visible, delta) = state.push("Hello\n##DON", true);
268        assert_eq!(visible, "Hello");
269        assert_eq!(delta, "Hello");
270
271        let (visible, delta) = state.push("E##\nmore", true);
272        assert_eq!(visible, "Hello\n\nmore");
273        assert_eq!(delta, "\n\nmore");
274    }
275
276    #[test]
277    fn clear_resets_streaming_state() {
278        let mut state = VisibleTextState::default();
279        let _ = state.push("Hello world", true);
280        state.clear();
281        let (visible, delta) = state.push("fresh", true);
282        assert_eq!(visible, "fresh");
283        assert_eq!(delta, "fresh");
284    }
285
286    #[test]
287    fn sanitize_drops_inline_planner_json_only_with_planner_mode() {
288        let raw = r#"{"mode":"plan_then_execute","plan":[]}"#;
289        assert_eq!(sanitize_visible_assistant_text(raw, false), "");
290        let raw = r#"{"status":"ok","message":"hello"}"#;
291        assert_eq!(sanitize_visible_assistant_text(raw, false), raw);
292    }
293}