Skip to main content

koda_core/
microcompact.rs

1//! Microcompact — lightweight tool result aging without full compaction.
2//!
3//! Replaces old tool result content with a stub (`[Old tool result content cleared]`)
4//! directly in the database. No LLM call, no API cost — just a SQL UPDATE.
5//!
6//! **Time-based trigger**: only fires when the gap since the last assistant
7//! message exceeds a threshold (default 5 minutes). This prevents aggressive
8//! clearing during active tool use and matches Claude Code's pattern where
9//! microcompact only runs when the prompt cache has gone cold.
10//!
11//! Inspired by Claude Code's `microCompact.ts`.
12
13use crate::context_analysis;
14use crate::db::Database;
15use crate::inference_helpers::CHARS_PER_TOKEN;
16use crate::persistence::{Message, Persistence, Role};
17use anyhow::Result;
18use std::collections::HashMap;
19
20/// Stub text that replaces cleared tool results.
21pub const CLEARED_MESSAGE: &str = "[Old tool result content cleared]";
22
23/// Tools whose results can be safely cleared (output is re-obtainable).
24///
25/// **Canonical names only.**  `is_compactable` normalizes input through
26/// `tool_normalize::normalize_tool_name` before lookup, so any spelling
27/// the model emits — PascalCase, snake_case, camelCase, or any registered
28/// alias — routes to the same canonical entry here.  This means:
29///
30/// 1. **No dual-case redundancy.**  Adding `"read"` next to `"Read"` is
31///    pointless; the lookup canonicalizes first.
32/// 2. **Spellings must match `tool_normalize::CANONICAL`** exactly.
33///    For example, the canonical name for the file-listing tool is
34///    `"List"`, not `"ListFiles"`.  The `compactable_tools_are_canonical`
35///    test enforces this at test time — if an entry here ever drifts
36///    out of sync with `CANONICAL`, the test breaks loudly.
37///
38/// History: this constant used to list both PascalCase and snake_case
39/// variants of every entry, which (a) was redundant after #1075 wired
40/// `normalize_tool_calls` into `inference.rs` before persistence, and
41/// (b) hid a latent bug — `"ListFiles"` is not the canonical name, so
42/// `List` results were never compacted.  Fixed in #1083.
43const COMPACTABLE_TOOLS: &[&str] = &[
44    "Read",
45    "Bash",
46    "Grep",
47    "Glob",
48    "List",
49    "WebSearch",
50    "WebFetch",
51];
52
53/// Number of most-recent compactable tool results to keep intact.
54///
55/// Claude Code uses 5 with a 60-minute gap threshold. We match their
56/// keep-recent count.
57const KEEP_RECENT: usize = 5;
58
59/// Minimum idle gap (in seconds) since the last assistant message before
60/// microcompact fires. During active tool use the model needs those results;
61/// clearing them mid-turn is wasteful and confusing.
62///
63/// 5 minutes = user went for coffee, came back, sent a new message.
64/// Claude Code uses 60 minutes (tied to Anthropic's prompt cache TTL).
65/// We use a shorter gap because koda has no server-side cache to protect.
66const GAP_THRESHOLD_SECS: i64 = 300;
67
68/// Minimum token size for a tool result to be worth clearing.
69/// Don't bother clearing tiny results — the overhead of the stub is comparable.
70const MIN_TOKENS_TO_CLEAR: usize = 50;
71
72/// Result of a microcompact pass.
73#[derive(Debug, Clone)]
74pub struct MicrocompactResult {
75    /// Number of tool results cleared.
76    pub cleared: usize,
77    /// Estimated tokens saved.
78    pub tokens_saved: usize,
79}
80
81/// Run microcompact on a session — clear old compactable tool results.
82///
83/// Only fires when the gap since the last assistant message exceeds
84/// `GAP_THRESHOLD_SECS`. Returns `None` if the trigger doesn't fire
85/// or nothing was cleared.
86pub async fn microcompact_session(
87    db: &Database,
88    session_id: &str,
89) -> Result<Option<MicrocompactResult>> {
90    // Check the time-based trigger first — skip the heavy scan if idle gap
91    // hasn't been reached.
92    let gap = db.seconds_since_last_assistant(session_id).await?;
93    match gap {
94        None => return Ok(None), // No assistant messages yet.
95        Some(s) if s < GAP_THRESHOLD_SECS => return Ok(None),
96        _ => {} // Gap exceeded — proceed.
97    }
98
99    let history = db.load_context(session_id).await?;
100    if history.len() < KEEP_RECENT + 2 {
101        return Ok(None);
102    }
103
104    // Build tool_call_id → tool_name map from assistant messages.
105    let id_to_tool = build_tool_id_map(&history);
106
107    // Collect compactable tool result message IDs in chronological order.
108    let compactable: Vec<CompactableResult> = history
109        .iter()
110        .filter_map(|msg| {
111            if msg.role != Role::Tool {
112                return None;
113            }
114            let tool_call_id = msg.tool_call_id.as_deref()?;
115            let tool_name = id_to_tool.get(tool_call_id)?;
116            if !is_compactable(tool_name) {
117                return None;
118            }
119            // Skip already-cleared results.
120            let content = msg.content.as_deref().unwrap_or("");
121            if content == CLEARED_MESSAGE {
122                return None;
123            }
124            let tokens = estimate_tokens(content);
125            if tokens < MIN_TOKENS_TO_CLEAR {
126                return None;
127            }
128            Some(CompactableResult {
129                message_id: msg.id,
130                tokens,
131            })
132        })
133        .collect();
134
135    if compactable.len() <= KEEP_RECENT {
136        return Ok(None);
137    }
138
139    // Keep the last KEEP_RECENT, clear the rest.
140    let to_clear = &compactable[..compactable.len() - KEEP_RECENT];
141
142    let mut tokens_saved = 0usize;
143    let mut cleared = 0usize;
144
145    for batch in to_clear.chunks(100) {
146        let ids: Vec<i64> = batch.iter().map(|c| c.message_id).collect();
147        db.clear_message_content(&ids, CLEARED_MESSAGE).await?;
148        tokens_saved += batch.iter().map(|c| c.tokens).sum::<usize>();
149        cleared += batch.len();
150    }
151
152    if cleared == 0 {
153        return Ok(None);
154    }
155
156    tracing::info!("Microcompact: cleared {cleared} tool results, saved ~{tokens_saved} tokens");
157
158    Ok(Some(MicrocompactResult {
159        cleared,
160        tokens_saved,
161    }))
162}
163
164/// Identifies the best candidates for microcompact using context analysis.
165///
166/// Returns a human-readable hint for diagnostics (e.g., "Bash: ~8000 tok, Read: ~3000 tok").
167pub fn diagnosis(messages: &[Message]) -> Option<String> {
168    let analysis = context_analysis::analyze_context(messages);
169    let top = analysis.top_tool_results(3);
170    if top.is_empty() || analysis.total_tool_result_tokens() < 500 {
171        return None;
172    }
173
174    let parts: Vec<String> = top
175        .iter()
176        .filter(|(name, _)| is_compactable(name))
177        .map(|(name, tokens)| format!("{name}: ~{tokens} tok"))
178        .collect();
179
180    if parts.is_empty() {
181        return None;
182    }
183
184    Some(parts.join(", "))
185}
186
187// ---------------------------------------------------------------------------
188// Internal helpers
189// ---------------------------------------------------------------------------
190
191struct CompactableResult {
192    message_id: i64,
193    tokens: usize,
194}
195
196fn is_compactable(tool_name: &str) -> bool {
197    // Canonicalize the spelling first so PascalCase / snake_case /
198    // camelCase / any registered alias all hit the same entry.  See the
199    // doc comment on `COMPACTABLE_TOOLS` for the rationale.  Cheap:
200    // `normalize_tool_name` is a single `HashMap::get` on a lowercased
201    // input, falling back to pass-through for unknown names.
202    let canonical = crate::tool_normalize::normalize_tool_name(tool_name);
203    COMPACTABLE_TOOLS.contains(&canonical.as_str())
204}
205
206fn estimate_tokens(content: &str) -> usize {
207    (content.len() as f64 / CHARS_PER_TOKEN) as usize
208}
209
210/// Build a map from tool_call_id → tool_name by scanning assistant messages.
211fn build_tool_id_map(messages: &[Message]) -> HashMap<String, String> {
212    let mut map = HashMap::new();
213    for msg in messages {
214        if msg.role == Role::Assistant
215            && let Some(ref tc_json) = msg.tool_calls
216            && let Ok(calls) = serde_json::from_str::<Vec<serde_json::Value>>(tc_json)
217        {
218            for call in &calls {
219                let id = call.get("id").and_then(|v| v.as_str()).unwrap_or_default();
220                let name = call
221                    .get("function_name")
222                    .or_else(|| call.get("name"))
223                    .and_then(|v| v.as_str())
224                    .unwrap_or("unknown");
225                if !id.is_empty() {
226                    map.insert(id.to_string(), name.to_string());
227                }
228            }
229        }
230    }
231    map
232}
233
234#[cfg(test)]
235mod tests {
236    use super::*;
237    use crate::persistence::{Message, Role};
238
239    fn msg(
240        id: i64,
241        role: Role,
242        content: Option<&str>,
243        tool_calls: Option<&str>,
244        tool_call_id: Option<&str>,
245    ) -> Message {
246        Message {
247            id,
248            session_id: String::new(),
249            role,
250            content: content.map(String::from),
251            full_content: None,
252            tool_calls: tool_calls.map(String::from),
253            tool_call_id: tool_call_id.map(String::from),
254            prompt_tokens: None,
255            completion_tokens: None,
256            cache_read_tokens: None,
257            cache_creation_tokens: None,
258            thinking_tokens: None,
259            thinking_content: None,
260            created_at: None,
261        }
262    }
263
264    #[test]
265    fn test_is_compactable() {
266        assert!(is_compactable("Read"));
267        assert!(is_compactable("Bash"));
268        assert!(is_compactable("Grep"));
269        assert!(is_compactable("Glob"));
270        assert!(is_compactable("List"));
271        assert!(is_compactable("WebSearch"));
272        assert!(is_compactable("WebFetch"));
273        assert!(!is_compactable("InvokeAgent"));
274        assert!(!is_compactable("TodoWrite"));
275        assert!(!is_compactable("AskUser"));
276    }
277
278    /// Regression for the latent bug discovered in #1083: post-#1075,
279    /// every `list_files` / `ListFiles` call is normalized to `"List"`
280    /// before persistence, so the old `COMPACTABLE_TOOLS` entries
281    /// (`"ListFiles"`, `"list_files"`) were dead code and `List`
282    /// results were never compacted.
283    #[test]
284    fn test_is_compactable_list_canonical_name() {
285        assert!(
286            is_compactable("List"),
287            "canonical name 'List' must be compactable \
288             (regression for the #1083 latent bug)"
289        );
290    }
291
292    /// Belt-and-suspenders: legacy DB rows from before #1075 may still
293    /// carry snake_case or alternate PascalCase spellings.  Defensive
294    /// normalization at the lookup site means any registered alias
295    /// hits the same canonical entry, so old sessions stay compactable
296    /// without a DB migration.
297    #[test]
298    fn test_is_compactable_accepts_aliases_for_legacy_rows() {
299        // snake_case (legacy pre-#1075 spelling)
300        assert!(is_compactable("list_files"));
301        assert!(is_compactable("read_file"));
302        assert!(is_compactable("web_fetch"));
303        assert!(is_compactable("web_search"));
304        assert!(is_compactable("grep_search"));
305        // alternate PascalCase that the audit's old constant listed
306        assert!(is_compactable("ListFiles"));
307        // single-letter aliases
308        assert!(is_compactable("ls"));
309        assert!(is_compactable("rg"));
310    }
311
312    /// Compile-time-ish drift guard: every entry in `COMPACTABLE_TOOLS`
313    /// must already be its own canonical form, otherwise the lookup is
314    /// dead code (the input gets normalized before comparison).  Catches
315    /// the exact class of bug #1083 fixed: an entry like `"ListFiles"`
316    /// that doesn't match `tool_normalize::CANONICAL`.
317    #[test]
318    fn test_compactable_tools_are_canonical() {
319        for &name in COMPACTABLE_TOOLS {
320            let canonical = crate::tool_normalize::normalize_tool_name(name);
321            assert_eq!(
322                canonical, name,
323                "COMPACTABLE_TOOLS entry {name:?} is not canonical — \
324                 it normalizes to {canonical:?}.  Either add it to \
325                 `tool_normalize::CANONICAL` or use the canonical spelling."
326            );
327        }
328    }
329
330    #[test]
331    fn test_build_tool_id_map() {
332        let tc = r#"[{"id":"tc_1","function_name":"Read","arguments":"{}"},{"id":"tc_2","function_name":"Bash","arguments":"{}"}]"#;
333        let messages = vec![msg(1, Role::Assistant, None, Some(tc), None)];
334        let map = build_tool_id_map(&messages);
335        assert_eq!(map.get("tc_1").unwrap(), "Read");
336        assert_eq!(map.get("tc_2").unwrap(), "Bash");
337    }
338
339    #[test]
340    fn test_already_cleared_skipped() {
341        let tc = r#"[{"id":"tc_1","function_name":"Read","arguments":"{}"}]"#;
342        let messages = vec![
343            msg(1, Role::Assistant, None, Some(tc), None),
344            msg(2, Role::Tool, Some(CLEARED_MESSAGE), None, Some("tc_1")),
345        ];
346        let _id_map = build_tool_id_map(&messages);
347        let compactable: Vec<_> = messages
348            .iter()
349            .filter(|m| m.role == Role::Tool)
350            .filter(|m| {
351                let content = m.content.as_deref().unwrap_or("");
352                content != CLEARED_MESSAGE
353            })
354            .collect();
355        assert!(compactable.is_empty());
356    }
357
358    #[test]
359    fn test_diagnosis_with_results() {
360        let tc1 = r#"[{"id":"tc_1","function_name":"Read","arguments":"{}"}]"#;
361        let tc2 = r#"[{"id":"tc_2","function_name":"Bash","arguments":"{}"}]"#;
362        let long = "x".repeat(2000);
363        let messages = vec![
364            msg(1, Role::User, Some("hi"), None, None),
365            msg(2, Role::Assistant, None, Some(tc1), None),
366            msg(3, Role::Tool, Some(&long), None, Some("tc_1")),
367            msg(4, Role::Assistant, None, Some(tc2), None),
368            msg(5, Role::Tool, Some(&long), None, Some("tc_2")),
369        ];
370        let diag = diagnosis(&messages);
371        assert!(diag.is_some());
372        let text = diag.unwrap();
373        assert!(text.contains("Read") || text.contains("Bash"));
374    }
375
376    #[test]
377    fn test_diagnosis_empty() {
378        let messages = vec![
379            msg(1, Role::User, Some("hi"), None, None),
380            msg(2, Role::Assistant, Some("hello"), None, None),
381        ];
382        assert!(diagnosis(&messages).is_none());
383    }
384
385    /// Integration test: verifies microcompact clears old results in a real SQLite DB,
386    /// but only when the time-based trigger fires (last assistant msg is old enough).
387    #[tokio::test]
388    async fn test_microcompact_session_integration() {
389        let tmp = tempfile::TempDir::new().unwrap();
390        let db_path = tmp.path().join("test.db");
391        let db = crate::db::Database::open(&db_path).await.unwrap();
392        let session = db.create_session("default", tmp.path()).await.unwrap();
393
394        let long_content = "x".repeat(500);
395
396        // Insert KEEP_RECENT + 3 compactable tool calls (Read).
397        for i in 0..(KEEP_RECENT + 3) {
398            let tc_id = format!("tc_{i}");
399            let tc_json =
400                format!(r#"[{{"id":"{tc_id}","function_name":"Read","arguments":"{{}}"}}]"#);
401            let mid = db
402                .insert_message(&session, &Role::Assistant, None, Some(&tc_json), None, None)
403                .await
404                .unwrap();
405            // Mark complete — these represent finished turns that load_context must see.
406            db.mark_message_complete(mid).await.unwrap();
407            db.insert_message(
408                &session,
409                &Role::Tool,
410                Some(&long_content),
411                None,
412                Some(&tc_id),
413                None,
414            )
415            .await
416            .unwrap();
417        }
418
419        // Should NOT trigger — last assistant message is fresh (just inserted).
420        let result = microcompact_session(&db, &session).await.unwrap();
421        assert!(result.is_none(), "should not trigger for fresh messages");
422
423        // Backdate the last assistant message so the time-based trigger fires.
424        sqlx::query(
425            "UPDATE messages SET created_at = datetime('now', '-10 minutes') \
426             WHERE session_id = ? AND role = 'assistant' \
427             AND id = (SELECT MAX(id) FROM messages WHERE session_id = ? AND role = 'assistant')",
428        )
429        .bind(&session)
430        .bind(&session)
431        .execute(db.pool())
432        .await
433        .unwrap();
434
435        // NOW it should trigger.
436        let result = microcompact_session(&db, &session).await.unwrap();
437        assert!(result.is_some(), "should trigger after gap threshold");
438        let mc = result.unwrap();
439        assert_eq!(mc.cleared, 3); // 3 oldest should be cleared
440        assert!(mc.tokens_saved > 0);
441
442        // Verify: load context and check that old results are stubs.
443        let history = db.load_context(&session).await.unwrap();
444        let tool_msgs: Vec<_> = history.iter().filter(|m| m.role == Role::Tool).collect();
445
446        // First 3 should be cleared
447        for m in &tool_msgs[..3] {
448            assert_eq!(m.content.as_deref().unwrap(), CLEARED_MESSAGE);
449        }
450        // Last KEEP_RECENT should be intact
451        for m in &tool_msgs[3..] {
452            assert_eq!(m.content.as_deref().unwrap(), long_content);
453        }
454
455        // Run again — should be idempotent (nothing more to clear)
456        let result2 = microcompact_session(&db, &session).await.unwrap();
457        assert!(result2.is_none());
458    }
459
460    // ── estimate_tokens ───────────────────────────────────────────────
461
462    #[test]
463    fn test_estimate_tokens_proportional_to_chars() {
464        let short = estimate_tokens("hello");
465        let long = estimate_tokens(&"x".repeat(400));
466        assert!(long > short, "more chars should estimate more tokens");
467    }
468
469    #[test]
470    fn test_estimate_tokens_empty_string() {
471        assert_eq!(estimate_tokens(""), 0);
472    }
473
474    #[test]
475    fn test_estimate_tokens_below_min_threshold() {
476        // Content shorter than MIN_TOKENS_TO_CLEAR chars should estimate < threshold.
477        let tiny = "hi";
478        let tokens = estimate_tokens(tiny);
479        assert!(
480            tokens < MIN_TOKENS_TO_CLEAR,
481            "tiny content ({tokens} tokens) should be below MIN_TOKENS_TO_CLEAR ({MIN_TOKENS_TO_CLEAR})"
482        );
483    }
484
485    // ── is_compactable ──────────────────────────────────────────────
486
487    #[test]
488    fn test_is_not_compactable_write() {
489        assert!(!is_compactable("Write"));
490        assert!(!is_compactable("write"));
491    }
492
493    #[test]
494    fn test_is_not_compactable_edit() {
495        assert!(!is_compactable("Edit"));
496        assert!(!is_compactable("edit"));
497    }
498
499    #[test]
500    fn test_is_not_compactable_unknown_tool() {
501        assert!(!is_compactable("FancyCustomTool"));
502        assert!(!is_compactable(""));
503    }
504
505    // ── diagnosis ───────────────────────────────────────────────────
506
507    #[test]
508    fn test_diagnosis_returns_none_below_token_threshold() {
509        // A small tool result (<500 tokens) should not trigger diagnosis.
510        let tc = r#"[{"id":"tc_1","function_name":"Bash","arguments":"{}"}]"#;
511        let messages = vec![
512            msg(1, Role::Assistant, None, Some(tc), None),
513            msg(2, Role::Tool, Some("tiny result"), None, Some("tc_1")),
514        ];
515        assert!(diagnosis(&messages).is_none());
516    }
517
518    #[test]
519    fn test_diagnosis_includes_compactable_tools_only() {
520        // Write tool results should NOT appear in diagnosis even if large.
521        let tc_write = r#"[{"id":"tc_w","function_name":"Write","arguments":"{}"}]"#;
522        let tc_read = r#"[{"id":"tc_r","function_name":"Read","arguments":"{}"}]"#;
523        let big = "X".repeat(3000);
524        let messages = vec![
525            msg(1, Role::Assistant, None, Some(tc_write), None),
526            msg(2, Role::Tool, Some(&big), None, Some("tc_w")),
527            msg(3, Role::Assistant, None, Some(tc_read), None),
528            msg(4, Role::Tool, Some(&big), None, Some("tc_r")),
529        ];
530        let d = diagnosis(&messages);
531        assert!(d.is_some());
532        let text = d.unwrap();
533        assert!(
534            !text.contains("Write"),
535            "Write should not appear in diagnosis"
536        );
537        assert!(text.contains("Read"), "Read should appear in diagnosis");
538    }
539
540    #[test]
541    fn test_diagnosis_returns_none_when_all_tools_non_compactable() {
542        // Only Write results — nothing compactable to diagnose.
543        let tc = r#"[{"id":"tc_w","function_name":"Write","arguments":"{}"}]"#;
544        let big = "W".repeat(3000);
545        let messages = vec![
546            msg(1, Role::Assistant, None, Some(tc), None),
547            msg(2, Role::Tool, Some(&big), None, Some("tc_w")),
548        ];
549        assert!(diagnosis(&messages).is_none());
550    }
551
552    // ── build_tool_id_map edge cases ──────────────────────────────────
553
554    #[test]
555    fn test_build_tool_id_map_accepts_name_key_variant() {
556        // Some providers emit "name" instead of "function_name".
557        let tc = r#"[{"id":"tc_x","name":"Grep","arguments":"{}"}]"#;
558        let messages = vec![msg(1, Role::Assistant, None, Some(tc), None)];
559        let map = build_tool_id_map(&messages);
560        assert_eq!(map.get("tc_x").map(|s| s.as_str()), Some("Grep"));
561    }
562
563    #[test]
564    fn test_build_tool_id_map_ignores_non_assistant_messages() {
565        let tc = r#"[{"id":"tc_y","function_name":"Bash","arguments":"{}"}]"#;
566        // Tool role message with tool_calls JSON — should be ignored.
567        let messages = vec![msg(1, Role::Tool, None, Some(tc), None)];
568        let map = build_tool_id_map(&messages);
569        assert!(map.is_empty());
570    }
571}