Skip to main content

koda_core/
microcompact.rs

1//! Microcompact — lightweight tool result aging without full compaction.
2//!
3//! Replaces old tool result content with a stub (`[Old tool result content cleared]`)
4//! directly in the database. No LLM call, no API cost — just a SQL UPDATE.
5//!
6//! **Time-based trigger**: only fires when the gap since the last assistant
7//! message exceeds a threshold (default 5 minutes). This prevents aggressive
8//! clearing during active tool use and matches Claude Code's pattern where
9//! microcompact only runs when the prompt cache has gone cold.
10//!
11//! Inspired by Claude Code's `microCompact.ts`.
12
13use crate::context_analysis;
14use crate::db::Database;
15use crate::inference_helpers::CHARS_PER_TOKEN;
16use crate::persistence::{Message, Persistence, Role};
17use anyhow::Result;
18use std::collections::HashMap;
19
20/// Stub text that replaces cleared tool results.
21pub const CLEARED_MESSAGE: &str = "[Old tool result content cleared]";
22
23/// Tools whose results can be safely cleared (output is re-obtainable).
24const COMPACTABLE_TOOLS: &[&str] = &[
25    "Read",
26    "read",
27    "Bash",
28    "bash",
29    "Grep",
30    "grep",
31    "Glob",
32    "glob",
33    "ListFiles",
34    "list_files",
35    "WebSearch",
36    "web_search",
37    "WebFetch",
38    "web_fetch",
39];
40
41/// Number of most-recent compactable tool results to keep intact.
42///
43/// Claude Code uses 5 with a 60-minute gap threshold. We match their
44/// keep-recent count.
45const KEEP_RECENT: usize = 5;
46
47/// Minimum idle gap (in seconds) since the last assistant message before
48/// microcompact fires. During active tool use the model needs those results;
49/// clearing them mid-turn is wasteful and confusing.
50///
51/// 5 minutes = user went for coffee, came back, sent a new message.
52/// Claude Code uses 60 minutes (tied to Anthropic's prompt cache TTL).
53/// We use a shorter gap because koda has no server-side cache to protect.
54const GAP_THRESHOLD_SECS: i64 = 300;
55
56/// Minimum token size for a tool result to be worth clearing.
57/// Don't bother clearing tiny results — the overhead of the stub is comparable.
58const MIN_TOKENS_TO_CLEAR: usize = 50;
59
60/// Result of a microcompact pass.
61#[derive(Debug, Clone)]
62pub struct MicrocompactResult {
63    /// Number of tool results cleared.
64    pub cleared: usize,
65    /// Estimated tokens saved.
66    pub tokens_saved: usize,
67}
68
69/// Run microcompact on a session — clear old compactable tool results.
70///
71/// Only fires when the gap since the last assistant message exceeds
72/// `GAP_THRESHOLD_SECS`. Returns `None` if the trigger doesn't fire
73/// or nothing was cleared.
74pub async fn microcompact_session(
75    db: &Database,
76    session_id: &str,
77) -> Result<Option<MicrocompactResult>> {
78    // Check the time-based trigger first — skip the heavy scan if idle gap
79    // hasn't been reached.
80    let gap = db.seconds_since_last_assistant(session_id).await?;
81    match gap {
82        None => return Ok(None), // No assistant messages yet.
83        Some(s) if s < GAP_THRESHOLD_SECS => return Ok(None),
84        _ => {} // Gap exceeded — proceed.
85    }
86
87    let history = db.load_context(session_id).await?;
88    if history.len() < KEEP_RECENT + 2 {
89        return Ok(None);
90    }
91
92    // Build tool_call_id → tool_name map from assistant messages.
93    let id_to_tool = build_tool_id_map(&history);
94
95    // Collect compactable tool result message IDs in chronological order.
96    let compactable: Vec<CompactableResult> = history
97        .iter()
98        .filter_map(|msg| {
99            if msg.role != Role::Tool {
100                return None;
101            }
102            let tool_call_id = msg.tool_call_id.as_deref()?;
103            let tool_name = id_to_tool.get(tool_call_id)?;
104            if !is_compactable(tool_name) {
105                return None;
106            }
107            // Skip already-cleared results.
108            let content = msg.content.as_deref().unwrap_or("");
109            if content == CLEARED_MESSAGE {
110                return None;
111            }
112            let tokens = estimate_tokens(content);
113            if tokens < MIN_TOKENS_TO_CLEAR {
114                return None;
115            }
116            Some(CompactableResult {
117                message_id: msg.id,
118                tokens,
119            })
120        })
121        .collect();
122
123    if compactable.len() <= KEEP_RECENT {
124        return Ok(None);
125    }
126
127    // Keep the last KEEP_RECENT, clear the rest.
128    let to_clear = &compactable[..compactable.len() - KEEP_RECENT];
129
130    let mut tokens_saved = 0usize;
131    let mut cleared = 0usize;
132
133    for batch in to_clear.chunks(100) {
134        let ids: Vec<i64> = batch.iter().map(|c| c.message_id).collect();
135        db.clear_message_content(&ids, CLEARED_MESSAGE).await?;
136        tokens_saved += batch.iter().map(|c| c.tokens).sum::<usize>();
137        cleared += batch.len();
138    }
139
140    if cleared == 0 {
141        return Ok(None);
142    }
143
144    tracing::info!("Microcompact: cleared {cleared} tool results, saved ~{tokens_saved} tokens");
145
146    Ok(Some(MicrocompactResult {
147        cleared,
148        tokens_saved,
149    }))
150}
151
152/// Identifies the best candidates for microcompact using context analysis.
153///
154/// Returns a human-readable hint for diagnostics (e.g., "Bash: ~8000 tok, Read: ~3000 tok").
155pub fn diagnosis(messages: &[Message]) -> Option<String> {
156    let analysis = context_analysis::analyze_context(messages);
157    let top = analysis.top_tool_results(3);
158    if top.is_empty() || analysis.total_tool_result_tokens() < 500 {
159        return None;
160    }
161
162    let parts: Vec<String> = top
163        .iter()
164        .filter(|(name, _)| is_compactable(name))
165        .map(|(name, tokens)| format!("{name}: ~{tokens} tok"))
166        .collect();
167
168    if parts.is_empty() {
169        return None;
170    }
171
172    Some(parts.join(", "))
173}
174
175// ---------------------------------------------------------------------------
176// Internal helpers
177// ---------------------------------------------------------------------------
178
179struct CompactableResult {
180    message_id: i64,
181    tokens: usize,
182}
183
184fn is_compactable(tool_name: &str) -> bool {
185    COMPACTABLE_TOOLS.contains(&tool_name)
186}
187
188fn estimate_tokens(content: &str) -> usize {
189    (content.len() as f64 / CHARS_PER_TOKEN) as usize
190}
191
192/// Build a map from tool_call_id → tool_name by scanning assistant messages.
193fn build_tool_id_map(messages: &[Message]) -> HashMap<String, String> {
194    let mut map = HashMap::new();
195    for msg in messages {
196        if msg.role == Role::Assistant
197            && let Some(ref tc_json) = msg.tool_calls
198            && let Ok(calls) = serde_json::from_str::<Vec<serde_json::Value>>(tc_json)
199        {
200            for call in &calls {
201                let id = call.get("id").and_then(|v| v.as_str()).unwrap_or_default();
202                let name = call
203                    .get("function_name")
204                    .or_else(|| call.get("name"))
205                    .and_then(|v| v.as_str())
206                    .unwrap_or("unknown");
207                if !id.is_empty() {
208                    map.insert(id.to_string(), name.to_string());
209                }
210            }
211        }
212    }
213    map
214}
215
216#[cfg(test)]
217mod tests {
218    use super::*;
219    use crate::persistence::{Message, Role};
220
221    fn msg(
222        id: i64,
223        role: Role,
224        content: Option<&str>,
225        tool_calls: Option<&str>,
226        tool_call_id: Option<&str>,
227    ) -> Message {
228        Message {
229            id,
230            session_id: String::new(),
231            role,
232            content: content.map(String::from),
233            full_content: None,
234            tool_calls: tool_calls.map(String::from),
235            tool_call_id: tool_call_id.map(String::from),
236            prompt_tokens: None,
237            completion_tokens: None,
238            cache_read_tokens: None,
239            cache_creation_tokens: None,
240            thinking_tokens: None,
241            thinking_content: None,
242            created_at: None,
243        }
244    }
245
246    #[test]
247    fn test_is_compactable() {
248        assert!(is_compactable("Read"));
249        assert!(is_compactable("Bash"));
250        assert!(is_compactable("Grep"));
251        assert!(is_compactable("Glob"));
252        assert!(is_compactable("WebSearch"));
253        assert!(is_compactable("WebFetch"));
254        assert!(!is_compactable("InvokeAgent"));
255        assert!(!is_compactable("TodoWrite"));
256        assert!(!is_compactable("AskUser"));
257    }
258
259    #[test]
260    fn test_build_tool_id_map() {
261        let tc = r#"[{"id":"tc_1","function_name":"Read","arguments":"{}"},{"id":"tc_2","function_name":"Bash","arguments":"{}"}]"#;
262        let messages = vec![msg(1, Role::Assistant, None, Some(tc), None)];
263        let map = build_tool_id_map(&messages);
264        assert_eq!(map.get("tc_1").unwrap(), "Read");
265        assert_eq!(map.get("tc_2").unwrap(), "Bash");
266    }
267
268    #[test]
269    fn test_already_cleared_skipped() {
270        let tc = r#"[{"id":"tc_1","function_name":"Read","arguments":"{}"}]"#;
271        let messages = vec![
272            msg(1, Role::Assistant, None, Some(tc), None),
273            msg(2, Role::Tool, Some(CLEARED_MESSAGE), None, Some("tc_1")),
274        ];
275        let _id_map = build_tool_id_map(&messages);
276        let compactable: Vec<_> = messages
277            .iter()
278            .filter(|m| m.role == Role::Tool)
279            .filter(|m| {
280                let content = m.content.as_deref().unwrap_or("");
281                content != CLEARED_MESSAGE
282            })
283            .collect();
284        assert!(compactable.is_empty());
285    }
286
287    #[test]
288    fn test_diagnosis_with_results() {
289        let tc1 = r#"[{"id":"tc_1","function_name":"Read","arguments":"{}"}]"#;
290        let tc2 = r#"[{"id":"tc_2","function_name":"Bash","arguments":"{}"}]"#;
291        let long = "x".repeat(2000);
292        let messages = vec![
293            msg(1, Role::User, Some("hi"), None, None),
294            msg(2, Role::Assistant, None, Some(tc1), None),
295            msg(3, Role::Tool, Some(&long), None, Some("tc_1")),
296            msg(4, Role::Assistant, None, Some(tc2), None),
297            msg(5, Role::Tool, Some(&long), None, Some("tc_2")),
298        ];
299        let diag = diagnosis(&messages);
300        assert!(diag.is_some());
301        let text = diag.unwrap();
302        assert!(text.contains("Read") || text.contains("Bash"));
303    }
304
305    #[test]
306    fn test_diagnosis_empty() {
307        let messages = vec![
308            msg(1, Role::User, Some("hi"), None, None),
309            msg(2, Role::Assistant, Some("hello"), None, None),
310        ];
311        assert!(diagnosis(&messages).is_none());
312    }
313
314    /// Integration test: verifies microcompact clears old results in a real SQLite DB,
315    /// but only when the time-based trigger fires (last assistant msg is old enough).
316    #[tokio::test]
317    async fn test_microcompact_session_integration() {
318        let tmp = tempfile::TempDir::new().unwrap();
319        let db_path = tmp.path().join("test.db");
320        let db = crate::db::Database::open(&db_path).await.unwrap();
321        let session = db.create_session("default", tmp.path()).await.unwrap();
322
323        let long_content = "x".repeat(500);
324
325        // Insert KEEP_RECENT + 3 compactable tool calls (Read).
326        for i in 0..(KEEP_RECENT + 3) {
327            let tc_id = format!("tc_{i}");
328            let tc_json =
329                format!(r#"[{{"id":"{tc_id}","function_name":"Read","arguments":"{{}}"}}]"#);
330            let mid = db
331                .insert_message(&session, &Role::Assistant, None, Some(&tc_json), None, None)
332                .await
333                .unwrap();
334            // Mark complete — these represent finished turns that load_context must see.
335            db.mark_message_complete(mid).await.unwrap();
336            db.insert_message(
337                &session,
338                &Role::Tool,
339                Some(&long_content),
340                None,
341                Some(&tc_id),
342                None,
343            )
344            .await
345            .unwrap();
346        }
347
348        // Should NOT trigger — last assistant message is fresh (just inserted).
349        let result = microcompact_session(&db, &session).await.unwrap();
350        assert!(result.is_none(), "should not trigger for fresh messages");
351
352        // Backdate the last assistant message so the time-based trigger fires.
353        sqlx::query(
354            "UPDATE messages SET created_at = datetime('now', '-10 minutes') \
355             WHERE session_id = ? AND role = 'assistant' \
356             AND id = (SELECT MAX(id) FROM messages WHERE session_id = ? AND role = 'assistant')",
357        )
358        .bind(&session)
359        .bind(&session)
360        .execute(db.pool())
361        .await
362        .unwrap();
363
364        // NOW it should trigger.
365        let result = microcompact_session(&db, &session).await.unwrap();
366        assert!(result.is_some(), "should trigger after gap threshold");
367        let mc = result.unwrap();
368        assert_eq!(mc.cleared, 3); // 3 oldest should be cleared
369        assert!(mc.tokens_saved > 0);
370
371        // Verify: load context and check that old results are stubs.
372        let history = db.load_context(&session).await.unwrap();
373        let tool_msgs: Vec<_> = history.iter().filter(|m| m.role == Role::Tool).collect();
374
375        // First 3 should be cleared
376        for m in &tool_msgs[..3] {
377            assert_eq!(m.content.as_deref().unwrap(), CLEARED_MESSAGE);
378        }
379        // Last KEEP_RECENT should be intact
380        for m in &tool_msgs[3..] {
381            assert_eq!(m.content.as_deref().unwrap(), long_content);
382        }
383
384        // Run again — should be idempotent (nothing more to clear)
385        let result2 = microcompact_session(&db, &session).await.unwrap();
386        assert!(result2.is_none());
387    }
388
389    // ── estimate_tokens ───────────────────────────────────────────────
390
391    #[test]
392    fn test_estimate_tokens_proportional_to_chars() {
393        let short = estimate_tokens("hello");
394        let long = estimate_tokens(&"x".repeat(400));
395        assert!(long > short, "more chars should estimate more tokens");
396    }
397
398    #[test]
399    fn test_estimate_tokens_empty_string() {
400        assert_eq!(estimate_tokens(""), 0);
401    }
402
403    #[test]
404    fn test_estimate_tokens_below_min_threshold() {
405        // Content shorter than MIN_TOKENS_TO_CLEAR chars should estimate < threshold.
406        let tiny = "hi";
407        let tokens = estimate_tokens(tiny);
408        assert!(
409            tokens < MIN_TOKENS_TO_CLEAR,
410            "tiny content ({tokens} tokens) should be below MIN_TOKENS_TO_CLEAR ({MIN_TOKENS_TO_CLEAR})"
411        );
412    }
413
414    // ── is_compactable ──────────────────────────────────────────────
415
416    #[test]
417    fn test_is_not_compactable_write() {
418        assert!(!is_compactable("Write"));
419        assert!(!is_compactable("write"));
420    }
421
422    #[test]
423    fn test_is_not_compactable_edit() {
424        assert!(!is_compactable("Edit"));
425        assert!(!is_compactable("edit"));
426    }
427
428    #[test]
429    fn test_is_not_compactable_unknown_tool() {
430        assert!(!is_compactable("FancyCustomTool"));
431        assert!(!is_compactable(""));
432    }
433
434    // ── diagnosis ───────────────────────────────────────────────────
435
436    #[test]
437    fn test_diagnosis_returns_none_below_token_threshold() {
438        // A small tool result (<500 tokens) should not trigger diagnosis.
439        let tc = r#"[{"id":"tc_1","function_name":"Bash","arguments":"{}"}]"#;
440        let messages = vec![
441            msg(1, Role::Assistant, None, Some(tc), None),
442            msg(2, Role::Tool, Some("tiny result"), None, Some("tc_1")),
443        ];
444        assert!(diagnosis(&messages).is_none());
445    }
446
447    #[test]
448    fn test_diagnosis_includes_compactable_tools_only() {
449        // Write tool results should NOT appear in diagnosis even if large.
450        let tc_write = r#"[{"id":"tc_w","function_name":"Write","arguments":"{}"}]"#;
451        let tc_read = r#"[{"id":"tc_r","function_name":"Read","arguments":"{}"}]"#;
452        let big = "X".repeat(3000);
453        let messages = vec![
454            msg(1, Role::Assistant, None, Some(tc_write), None),
455            msg(2, Role::Tool, Some(&big), None, Some("tc_w")),
456            msg(3, Role::Assistant, None, Some(tc_read), None),
457            msg(4, Role::Tool, Some(&big), None, Some("tc_r")),
458        ];
459        let d = diagnosis(&messages);
460        assert!(d.is_some());
461        let text = d.unwrap();
462        assert!(
463            !text.contains("Write"),
464            "Write should not appear in diagnosis"
465        );
466        assert!(text.contains("Read"), "Read should appear in diagnosis");
467    }
468
469    #[test]
470    fn test_diagnosis_returns_none_when_all_tools_non_compactable() {
471        // Only Write results — nothing compactable to diagnose.
472        let tc = r#"[{"id":"tc_w","function_name":"Write","arguments":"{}"}]"#;
473        let big = "W".repeat(3000);
474        let messages = vec![
475            msg(1, Role::Assistant, None, Some(tc), None),
476            msg(2, Role::Tool, Some(&big), None, Some("tc_w")),
477        ];
478        assert!(diagnosis(&messages).is_none());
479    }
480
481    // ── build_tool_id_map edge cases ──────────────────────────────────
482
483    #[test]
484    fn test_build_tool_id_map_accepts_name_key_variant() {
485        // Some providers emit "name" instead of "function_name".
486        let tc = r#"[{"id":"tc_x","name":"Grep","arguments":"{}"}]"#;
487        let messages = vec![msg(1, Role::Assistant, None, Some(tc), None)];
488        let map = build_tool_id_map(&messages);
489        assert_eq!(map.get("tc_x").map(|s| s.as_str()), Some("Grep"));
490    }
491
492    #[test]
493    fn test_build_tool_id_map_ignores_non_assistant_messages() {
494        let tc = r#"[{"id":"tc_y","function_name":"Bash","arguments":"{}"}]"#;
495        // Tool role message with tool_calls JSON — should be ignored.
496        let messages = vec![msg(1, Role::Tool, None, Some(tc), None)];
497        let map = build_tool_id_map(&messages);
498        assert!(map.is_empty());
499    }
500}