Skip to main content

zeph_context/
microcompact.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Pure helpers for time-based microcompact (#2699).
5//!
6//! The `Agent`-level integration (reading `self.*`, mutating message history)
7//! lives in `zeph-core`. This module contains only the stateless helpers.
8
9use zeph_llm::provider::{Message, MessagePart};
10
11/// Tool names whose output is considered low-value after a session gap.
12///
13/// Case-insensitive comparison is used at the call site.
14pub const LOW_VALUE_TOOLS: &[&str] = &[
15    "bash",
16    "shell",
17    "grep",
18    "rg",
19    "ripgrep",
20    "glob",
21    "find",
22    "web_fetch",
23    "fetch",
24    "web_search",
25    "search",
26    "read",
27    "cat",
28    "list_directory",
29];
30
31/// Sentinel content placed in cleared tool outputs.
32///
33/// Prefixed with `[cleared` so reload detection can skip already-cleared parts.
34pub const CLEARED_SENTINEL_PREFIX: &str = "[cleared";
35
36/// Returns the tool name from the closest preceding `ToolUse` part, if any.
37///
38/// Walks backward from `result_idx - 1` looking for a `ToolUse` variant.
39#[must_use]
40pub fn find_preceding_tool_use_name(parts: &[MessagePart], result_idx: usize) -> Option<&str> {
41    for part in parts[..result_idx].iter().rev() {
42        if let MessagePart::ToolUse { name, .. } = part {
43            return Some(name.as_str());
44        }
45    }
46    None
47}
48
49/// Returns `true` if `tool_name` (case-insensitive) is in the low-value set.
50#[must_use]
51pub fn is_low_value_tool(tool_name: &str) -> bool {
52    let lower = tool_name.to_lowercase();
53    LOW_VALUE_TOOLS.contains(&lower.as_str())
54}
55
56/// Index into a message's parts list identifying which part to compact.
57#[derive(Debug)]
58#[non_exhaustive]
59pub enum CompactTarget {
60    /// A `ToolOutput` part at the given index.
61    Output(usize),
62    /// A `ToolResult` part at the given index.
63    Result(usize),
64}
65
66/// Sweep stale low-value tool outputs from the message list.
67///
68/// Clears all but the most recent `keep_recent` compactable outputs, replacing their
69/// content with `sentinel`. The `now_ts` parameter is the current Unix timestamp
70/// (seconds) used to mark `compacted_at` on `ToolOutput` parts.
71///
72/// Returns the number of cleared entries.
73pub fn sweep_stale_tool_outputs(
74    messages: &mut [Message],
75    keep_recent: usize,
76    sentinel: &str,
77    now_ts: i64,
78) -> usize {
79    let mut compactable: Vec<(usize, CompactTarget)> = Vec::new();
80
81    for (msg_idx, msg) in messages.iter().enumerate() {
82        for (part_idx, part) in msg.parts.iter().enumerate() {
83            match part {
84                MessagePart::ToolOutput {
85                    tool_name,
86                    body,
87                    compacted_at,
88                    ..
89                } => {
90                    if compacted_at.is_some()
91                        || body.starts_with(CLEARED_SENTINEL_PREFIX)
92                        || !is_low_value_tool(tool_name.as_str())
93                    {
94                        continue;
95                    }
96                    compactable.push((msg_idx, CompactTarget::Output(part_idx)));
97                }
98                MessagePart::ToolResult { content, .. } => {
99                    if content.starts_with(CLEARED_SENTINEL_PREFIX) {
100                        continue;
101                    }
102                    let tool_name = find_preceding_tool_use_name(&msg.parts, part_idx);
103                    if let Some(name) = tool_name
104                        && is_low_value_tool(name)
105                    {
106                        compactable.push((msg_idx, CompactTarget::Result(part_idx)));
107                    }
108                }
109                _ => {}
110            }
111        }
112    }
113
114    let total = compactable.len();
115    if total == 0 {
116        return 0;
117    }
118
119    let clear_count = total.saturating_sub(keep_recent);
120    if clear_count == 0 {
121        return 0;
122    }
123
124    for (msg_idx, target) in &compactable[..clear_count] {
125        let msg = &mut messages[*msg_idx];
126        match target {
127            CompactTarget::Output(part_idx) => {
128                if let MessagePart::ToolOutput {
129                    body, compacted_at, ..
130                } = &mut msg.parts[*part_idx]
131                {
132                    body.clone_from(&sentinel.to_string());
133                    *compacted_at = Some(now_ts);
134                }
135            }
136            CompactTarget::Result(part_idx) => {
137                if let MessagePart::ToolResult { content, .. } = &mut msg.parts[*part_idx] {
138                    content.clone_from(&sentinel.to_string());
139                }
140            }
141        }
142    }
143
144    clear_count
145}
146
147#[cfg(test)]
148mod tests {
149    use super::*;
150
151    #[test]
152    fn low_value_tool_detection_case_insensitive() {
153        assert!(is_low_value_tool("Bash"));
154        assert!(is_low_value_tool("GREP"));
155        assert!(is_low_value_tool("list_directory"));
156        assert!(!is_low_value_tool("file_edit"));
157        assert!(!is_low_value_tool("memory_save"));
158        assert!(!is_low_value_tool("mcp_tool"));
159    }
160
161    #[test]
162    fn find_preceding_tool_use_name_returns_closest() {
163        let parts = vec![
164            MessagePart::ToolUse {
165                id: "1".into(),
166                name: "bash".into(),
167                input: serde_json::Value::Null,
168            },
169            MessagePart::ToolResult {
170                tool_use_id: "1".into(),
171                content: "output".into(),
172                is_error: false,
173            },
174        ];
175        let name = find_preceding_tool_use_name(&parts, 1);
176        assert_eq!(name, Some("bash"));
177    }
178
179    #[test]
180    fn find_preceding_tool_use_name_no_match() {
181        let parts = vec![MessagePart::ToolResult {
182            tool_use_id: "1".into(),
183            content: "output".into(),
184            is_error: false,
185        }];
186        let name = find_preceding_tool_use_name(&parts, 0);
187        assert!(name.is_none());
188    }
189
190    fn tool_output_msg(tool_name: &str, body: &str) -> Message {
191        use zeph_llm::provider::{MessageMetadata, Role};
192        Message {
193            role: Role::User,
194            content: body.to_string(),
195            parts: vec![MessagePart::ToolOutput {
196                tool_name: tool_name.into(),
197                body: body.into(),
198                compacted_at: None,
199            }],
200            metadata: MessageMetadata::default(),
201        }
202    }
203
204    fn tool_result_msg(tool_name: &str, content: &str) -> Message {
205        use zeph_llm::provider::{MessageMetadata, Role};
206        Message {
207            role: Role::User,
208            content: content.to_string(),
209            parts: vec![
210                MessagePart::ToolUse {
211                    id: "id".into(),
212                    name: tool_name.into(),
213                    input: serde_json::Value::Null,
214                },
215                MessagePart::ToolResult {
216                    tool_use_id: "id".into(),
217                    content: content.into(),
218                    is_error: false,
219                },
220            ],
221            metadata: MessageMetadata::default(),
222        }
223    }
224
225    #[test]
226    fn sweep_clears_all_when_keep_recent_zero() {
227        let mut messages = vec![
228            tool_output_msg("bash", "output1"),
229            tool_output_msg("grep", "output2"),
230            tool_output_msg("shell", "output3"),
231        ];
232        let cleared = sweep_stale_tool_outputs(&mut messages, 0, "[cleared]", 1000);
233        assert_eq!(cleared, 3);
234        for msg in &messages {
235            if let MessagePart::ToolOutput {
236                body, compacted_at, ..
237            } = &msg.parts[0]
238            {
239                assert_eq!(body, "[cleared]");
240                assert_eq!(*compacted_at, Some(1000));
241            }
242        }
243    }
244
245    #[test]
246    fn sweep_preserves_keep_recent_most_recent() {
247        let mut messages = vec![
248            tool_output_msg("bash", "output1"),
249            tool_output_msg("grep", "output2"),
250            tool_output_msg("shell", "output3"),
251        ];
252        let cleared = sweep_stale_tool_outputs(&mut messages, 2, "[cleared]", 1000);
253        // 3 total - 2 keep_recent = 1 cleared
254        assert_eq!(cleared, 1);
255        // first message cleared
256        if let MessagePart::ToolOutput { body, .. } = &messages[0].parts[0] {
257            assert_eq!(body, "[cleared]");
258        }
259        // last two preserved
260        if let MessagePart::ToolOutput { body, .. } = &messages[1].parts[0] {
261            assert_eq!(body, "output2");
262        }
263        if let MessagePart::ToolOutput { body, .. } = &messages[2].parts[0] {
264            assert_eq!(body, "output3");
265        }
266    }
267
268    #[test]
269    fn sweep_is_idempotent_on_already_cleared() {
270        let mut messages = vec![
271            tool_output_msg("bash", "[cleared — stale]"),
272            tool_output_msg("grep", "output2"),
273        ];
274        // First message already cleared — only 1 compactable, keep_recent=0 → clear 1
275        let cleared = sweep_stale_tool_outputs(&mut messages, 0, "[cleared]", 1000);
276        assert_eq!(cleared, 1);
277        // Already-cleared message body should be unchanged (it was skipped)
278        if let MessagePart::ToolOutput { body, .. } = &messages[0].parts[0] {
279            assert_eq!(body, "[cleared — stale]");
280        }
281        // Second message should now be cleared
282        if let MessagePart::ToolOutput { body, .. } = &messages[1].parts[0] {
283            assert_eq!(body, "[cleared]");
284        }
285    }
286
287    #[test]
288    fn sweep_skips_high_value_tools() {
289        let mut messages = vec![
290            tool_output_msg("file_edit", "important"),
291            tool_output_msg("bash", "output"),
292        ];
293        let cleared = sweep_stale_tool_outputs(&mut messages, 0, "[cleared]", 1000);
294        // file_edit is high-value, only bash is compactable
295        assert_eq!(cleared, 1);
296        if let MessagePart::ToolOutput { body, .. } = &messages[0].parts[0] {
297            assert_eq!(
298                body, "important",
299                "high-value tool output must be preserved"
300            );
301        }
302        if let MessagePart::ToolOutput { body, .. } = &messages[1].parts[0] {
303            assert_eq!(body, "[cleared]");
304        }
305    }
306
307    #[test]
308    fn sweep_clears_tool_result_parts() {
309        let mut messages = vec![
310            tool_result_msg("bash", "result1"),
311            tool_result_msg("grep", "result2"),
312        ];
313        let cleared = sweep_stale_tool_outputs(&mut messages, 0, "[cleared]", 1000);
314        assert_eq!(cleared, 2);
315        for msg in &messages {
316            if let MessagePart::ToolResult { content, .. } = &msg.parts[1] {
317                assert_eq!(content, "[cleared]");
318            }
319        }
320    }
321}