Skip to main content

zeph_context/
microcompact.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Pure helpers for time-based microcompact (#2699).
5//!
6//! The `Agent`-level integration (reading `self.*`, mutating message history)
7//! lives in `zeph-core`. This module contains only the stateless helpers.
8
9use zeph_llm::provider::{Message, MessagePart};
10
11/// Tool names whose output is considered low-value after a session gap.
12///
13/// Case-insensitive comparison is used at the call site.
14pub const LOW_VALUE_TOOLS: &[&str] = &[
15    "bash",
16    "shell",
17    "grep",
18    "rg",
19    "ripgrep",
20    "glob",
21    "find",
22    "web_fetch",
23    "fetch",
24    "web_search",
25    "search",
26    "read",
27    "cat",
28    "list_directory",
29];
30
31/// Sentinel content placed in cleared tool outputs.
32///
33/// Prefixed with `[cleared` so reload detection can skip already-cleared parts.
34pub const CLEARED_SENTINEL_PREFIX: &str = "[cleared";
35
36/// Returns the tool name from the closest preceding `ToolUse` part, if any.
37///
38/// Walks backward from `result_idx - 1` looking for a `ToolUse` variant.
39#[must_use]
40pub fn find_preceding_tool_use_name(parts: &[MessagePart], result_idx: usize) -> Option<&str> {
41    for part in parts[..result_idx].iter().rev() {
42        if let MessagePart::ToolUse { name, .. } = part {
43            return Some(name.as_str());
44        }
45    }
46    None
47}
48
49/// Returns `true` if `tool_name` (case-insensitive) is in the low-value set.
50#[must_use]
51pub fn is_low_value_tool(tool_name: &str) -> bool {
52    let lower = tool_name.to_lowercase();
53    LOW_VALUE_TOOLS.contains(&lower.as_str())
54}
55
56/// Index into a message's parts list identifying which part to compact.
57#[derive(Debug)]
58pub enum CompactTarget {
59    /// A `ToolOutput` part at the given index.
60    Output(usize),
61    /// A `ToolResult` part at the given index.
62    Result(usize),
63}
64
65/// Sweep stale low-value tool outputs from the message list.
66///
67/// Clears all but the most recent `keep_recent` compactable outputs, replacing their
68/// content with `sentinel`. The `now_ts` parameter is the current Unix timestamp
69/// (seconds) used to mark `compacted_at` on `ToolOutput` parts.
70///
71/// Returns the number of cleared entries.
72pub fn sweep_stale_tool_outputs(
73    messages: &mut [Message],
74    keep_recent: usize,
75    sentinel: &str,
76    now_ts: i64,
77) -> usize {
78    let mut compactable: Vec<(usize, CompactTarget)> = Vec::new();
79
80    for (msg_idx, msg) in messages.iter().enumerate() {
81        for (part_idx, part) in msg.parts.iter().enumerate() {
82            match part {
83                MessagePart::ToolOutput {
84                    tool_name,
85                    body,
86                    compacted_at,
87                    ..
88                } => {
89                    if compacted_at.is_some()
90                        || body.starts_with(CLEARED_SENTINEL_PREFIX)
91                        || !is_low_value_tool(tool_name.as_str())
92                    {
93                        continue;
94                    }
95                    compactable.push((msg_idx, CompactTarget::Output(part_idx)));
96                }
97                MessagePart::ToolResult { content, .. } => {
98                    if content.starts_with(CLEARED_SENTINEL_PREFIX) {
99                        continue;
100                    }
101                    let tool_name = find_preceding_tool_use_name(&msg.parts, part_idx);
102                    if let Some(name) = tool_name
103                        && is_low_value_tool(name)
104                    {
105                        compactable.push((msg_idx, CompactTarget::Result(part_idx)));
106                    }
107                }
108                _ => {}
109            }
110        }
111    }
112
113    let total = compactable.len();
114    if total == 0 {
115        return 0;
116    }
117
118    let clear_count = total.saturating_sub(keep_recent);
119    if clear_count == 0 {
120        return 0;
121    }
122
123    for (msg_idx, target) in &compactable[..clear_count] {
124        let msg = &mut messages[*msg_idx];
125        match target {
126            CompactTarget::Output(part_idx) => {
127                if let MessagePart::ToolOutput {
128                    body, compacted_at, ..
129                } = &mut msg.parts[*part_idx]
130                {
131                    body.clone_from(&sentinel.to_string());
132                    *compacted_at = Some(now_ts);
133                }
134            }
135            CompactTarget::Result(part_idx) => {
136                if let MessagePart::ToolResult { content, .. } = &mut msg.parts[*part_idx] {
137                    content.clone_from(&sentinel.to_string());
138                }
139            }
140        }
141    }
142
143    clear_count
144}
145
146#[cfg(test)]
147mod tests {
148    use super::*;
149
150    #[test]
151    fn low_value_tool_detection_case_insensitive() {
152        assert!(is_low_value_tool("Bash"));
153        assert!(is_low_value_tool("GREP"));
154        assert!(is_low_value_tool("list_directory"));
155        assert!(!is_low_value_tool("file_edit"));
156        assert!(!is_low_value_tool("memory_save"));
157        assert!(!is_low_value_tool("mcp_tool"));
158    }
159
160    #[test]
161    fn find_preceding_tool_use_name_returns_closest() {
162        let parts = vec![
163            MessagePart::ToolUse {
164                id: "1".into(),
165                name: "bash".into(),
166                input: serde_json::Value::Null,
167            },
168            MessagePart::ToolResult {
169                tool_use_id: "1".into(),
170                content: "output".into(),
171                is_error: false,
172            },
173        ];
174        let name = find_preceding_tool_use_name(&parts, 1);
175        assert_eq!(name, Some("bash"));
176    }
177
178    #[test]
179    fn find_preceding_tool_use_name_no_match() {
180        let parts = vec![MessagePart::ToolResult {
181            tool_use_id: "1".into(),
182            content: "output".into(),
183            is_error: false,
184        }];
185        let name = find_preceding_tool_use_name(&parts, 0);
186        assert!(name.is_none());
187    }
188
189    fn tool_output_msg(tool_name: &str, body: &str) -> Message {
190        use zeph_llm::provider::{MessageMetadata, Role};
191        Message {
192            role: Role::User,
193            content: body.to_string(),
194            parts: vec![MessagePart::ToolOutput {
195                tool_name: tool_name.into(),
196                body: body.into(),
197                compacted_at: None,
198            }],
199            metadata: MessageMetadata::default(),
200        }
201    }
202
203    fn tool_result_msg(tool_name: &str, content: &str) -> Message {
204        use zeph_llm::provider::{MessageMetadata, Role};
205        Message {
206            role: Role::User,
207            content: content.to_string(),
208            parts: vec![
209                MessagePart::ToolUse {
210                    id: "id".into(),
211                    name: tool_name.into(),
212                    input: serde_json::Value::Null,
213                },
214                MessagePart::ToolResult {
215                    tool_use_id: "id".into(),
216                    content: content.into(),
217                    is_error: false,
218                },
219            ],
220            metadata: MessageMetadata::default(),
221        }
222    }
223
224    #[test]
225    fn sweep_clears_all_when_keep_recent_zero() {
226        let mut messages = vec![
227            tool_output_msg("bash", "output1"),
228            tool_output_msg("grep", "output2"),
229            tool_output_msg("shell", "output3"),
230        ];
231        let cleared = sweep_stale_tool_outputs(&mut messages, 0, "[cleared]", 1000);
232        assert_eq!(cleared, 3);
233        for msg in &messages {
234            if let MessagePart::ToolOutput {
235                body, compacted_at, ..
236            } = &msg.parts[0]
237            {
238                assert_eq!(body, "[cleared]");
239                assert_eq!(*compacted_at, Some(1000));
240            }
241        }
242    }
243
244    #[test]
245    fn sweep_preserves_keep_recent_most_recent() {
246        let mut messages = vec![
247            tool_output_msg("bash", "output1"),
248            tool_output_msg("grep", "output2"),
249            tool_output_msg("shell", "output3"),
250        ];
251        let cleared = sweep_stale_tool_outputs(&mut messages, 2, "[cleared]", 1000);
252        // 3 total - 2 keep_recent = 1 cleared
253        assert_eq!(cleared, 1);
254        // first message cleared
255        if let MessagePart::ToolOutput { body, .. } = &messages[0].parts[0] {
256            assert_eq!(body, "[cleared]");
257        }
258        // last two preserved
259        if let MessagePart::ToolOutput { body, .. } = &messages[1].parts[0] {
260            assert_eq!(body, "output2");
261        }
262        if let MessagePart::ToolOutput { body, .. } = &messages[2].parts[0] {
263            assert_eq!(body, "output3");
264        }
265    }
266
267    #[test]
268    fn sweep_is_idempotent_on_already_cleared() {
269        let mut messages = vec![
270            tool_output_msg("bash", "[cleared — stale]"),
271            tool_output_msg("grep", "output2"),
272        ];
273        // First message already cleared — only 1 compactable, keep_recent=0 → clear 1
274        let cleared = sweep_stale_tool_outputs(&mut messages, 0, "[cleared]", 1000);
275        assert_eq!(cleared, 1);
276        // Already-cleared message body should be unchanged (it was skipped)
277        if let MessagePart::ToolOutput { body, .. } = &messages[0].parts[0] {
278            assert_eq!(body, "[cleared — stale]");
279        }
280        // Second message should now be cleared
281        if let MessagePart::ToolOutput { body, .. } = &messages[1].parts[0] {
282            assert_eq!(body, "[cleared]");
283        }
284    }
285
286    #[test]
287    fn sweep_skips_high_value_tools() {
288        let mut messages = vec![
289            tool_output_msg("file_edit", "important"),
290            tool_output_msg("bash", "output"),
291        ];
292        let cleared = sweep_stale_tool_outputs(&mut messages, 0, "[cleared]", 1000);
293        // file_edit is high-value, only bash is compactable
294        assert_eq!(cleared, 1);
295        if let MessagePart::ToolOutput { body, .. } = &messages[0].parts[0] {
296            assert_eq!(
297                body, "important",
298                "high-value tool output must be preserved"
299            );
300        }
301        if let MessagePart::ToolOutput { body, .. } = &messages[1].parts[0] {
302            assert_eq!(body, "[cleared]");
303        }
304    }
305
306    #[test]
307    fn sweep_clears_tool_result_parts() {
308        let mut messages = vec![
309            tool_result_msg("bash", "result1"),
310            tool_result_msg("grep", "result2"),
311        ];
312        let cleared = sweep_stale_tool_outputs(&mut messages, 0, "[cleared]", 1000);
313        assert_eq!(cleared, 2);
314        for msg in &messages {
315            if let MessagePart::ToolResult { content, .. } = &msg.parts[1] {
316                assert_eq!(content, "[cleared]");
317            }
318        }
319    }
320}