Skip to main content

mermaid_cli/domain/
compaction.rs

1//! Conversation context compaction.
2//!
3//! The reducer/effect boundary treats compaction as a first-class
4//! operation: effects generate a checkpoint summary, the reducer swaps
5//! the model-visible history, and persistence archives the removed raw
6//! messages. This keeps compaction observable instead of hiding it inside
7//! a provider adapter.
8
9use chrono::{DateTime, Local};
10use serde::{Deserialize, Serialize};
11
12use crate::constants::{
13    COMPACTION_AUTO_THRESHOLD_PERCENT, COMPACTION_MAX_RESPONSE_RESERVE_TOKENS,
14    COMPACTION_MIN_RESPONSE_RESERVE_TOKENS, COMPACTION_SUMMARIZER_INPUT_TOKEN_BUDGET,
15    COMPACTION_SUMMARY_MAX_TOKENS, COMPACTION_TAIL_TOKEN_BUDGET, COMPACTION_TAIL_TURNS,
16    COMPACTION_TOOL_OUTPUT_MAX_CHARS,
17};
18use crate::models::{ChatMessage, ChatMessageKind, MessageRole, ReasoningLevel, TokenUsage};
19
20use super::cmd::ChatRequest;
21use super::state::ContextUsageSnapshot;
22
23const CHECKPOINT_MARKER: &str = "MERMAID CONTEXT CHECKPOINT";
24
25#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
26#[serde(rename_all = "snake_case")]
27pub enum CompactionTrigger {
28    Manual,
29    AutoThreshold,
30    ContextLimitRetry,
31}
32
33impl CompactionTrigger {
34    pub fn as_str(self) -> &'static str {
35        match self {
36            Self::Manual => "manual",
37            Self::AutoThreshold => "auto_threshold",
38            Self::ContextLimitRetry => "context_limit_retry",
39        }
40    }
41
42    pub fn label(self) -> &'static str {
43        match self {
44            Self::Manual => "manual",
45            Self::AutoThreshold => "automatic",
46            Self::ContextLimitRetry => "context-limit retry",
47        }
48    }
49}
50
51#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
52pub struct CompactionPolicy {
53    pub auto_enabled: bool,
54    pub auto_threshold_percent: u8,
55    pub tail_turns: usize,
56    pub tail_token_budget: usize,
57    pub tool_output_max_chars: usize,
58    pub summary_max_tokens: usize,
59    pub summarizer_input_token_budget: usize,
60    pub min_response_reserve_tokens: usize,
61    pub max_response_reserve_tokens: usize,
62}
63
64impl Default for CompactionPolicy {
65    fn default() -> Self {
66        Self {
67            auto_enabled: true,
68            auto_threshold_percent: COMPACTION_AUTO_THRESHOLD_PERCENT,
69            tail_turns: COMPACTION_TAIL_TURNS,
70            tail_token_budget: COMPACTION_TAIL_TOKEN_BUDGET,
71            tool_output_max_chars: COMPACTION_TOOL_OUTPUT_MAX_CHARS,
72            summary_max_tokens: COMPACTION_SUMMARY_MAX_TOKENS,
73            summarizer_input_token_budget: COMPACTION_SUMMARIZER_INPUT_TOKEN_BUDGET,
74            min_response_reserve_tokens: COMPACTION_MIN_RESPONSE_RESERVE_TOKENS,
75            max_response_reserve_tokens: COMPACTION_MAX_RESPONSE_RESERVE_TOKENS,
76        }
77    }
78}
79
80impl CompactionPolicy {
81    pub fn response_reserve(self, request_max_tokens: usize) -> usize {
82        request_max_tokens
83            .max(self.min_response_reserve_tokens)
84            .min(self.max_response_reserve_tokens)
85    }
86}
87
88#[derive(Debug, Clone)]
89pub struct CompactionRequest {
90    pub chat: ChatRequest,
91    pub trigger: CompactionTrigger,
92    pub instructions: Option<String>,
93    pub force: bool,
94    pub policy: CompactionPolicy,
95}
96
97impl CompactionRequest {
98    pub fn manual(chat: ChatRequest, instructions: Option<String>) -> Self {
99        Self {
100            chat,
101            trigger: CompactionTrigger::Manual,
102            instructions,
103            force: true,
104            policy: CompactionPolicy::default(),
105        }
106    }
107
108    pub fn auto(chat: ChatRequest, trigger: CompactionTrigger) -> Self {
109        Self {
110            chat,
111            trigger,
112            instructions: None,
113            force: false,
114            policy: CompactionPolicy::default(),
115        }
116    }
117}
118
119#[derive(Debug, Clone, Serialize, Deserialize)]
120pub struct CompactionRecord {
121    pub id: String,
122    pub trigger: CompactionTrigger,
123    pub created_at: DateTime<Local>,
124    pub before_tokens: usize,
125    pub after_tokens: usize,
126    pub archived_message_count: usize,
127    pub preserved_message_count: usize,
128    pub summary_tokens: usize,
129    pub duration_secs: f64,
130    #[serde(default)]
131    pub focus: Option<String>,
132    #[serde(default)]
133    pub archive_path: Option<String>,
134}
135
136#[derive(Debug, Clone, Serialize, Deserialize)]
137pub struct CompactionArchive {
138    pub id: String,
139    pub conversation_id: String,
140    pub created_at: DateTime<Local>,
141    pub messages: Vec<ChatMessage>,
142}
143
144#[derive(Debug, Clone)]
145pub struct CompactionResult {
146    pub record: CompactionRecord,
147    pub replacement_messages: Vec<ChatMessage>,
148    pub archived_messages: Vec<ChatMessage>,
149    pub before_snapshot: ContextUsageSnapshot,
150    pub after_snapshot: ContextUsageSnapshot,
151    pub usage: Option<TokenUsage>,
152}
153
154#[derive(Debug, Clone)]
155pub struct PreparedCompaction {
156    pub archived_messages: Vec<ChatMessage>,
157    pub preserved_messages: Vec<ChatMessage>,
158    pub previous_summary: Option<String>,
159    pub history_excerpt: String,
160}
161
162#[derive(Debug, Clone, PartialEq, Eq)]
163pub enum CompactionSkip {
164    NoKnownContextLimit,
165    AutoDisabled,
166    BelowThreshold,
167    NothingToCompact,
168}
169
170impl std::fmt::Display for CompactionSkip {
171    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
172        match self {
173            Self::NoKnownContextLimit => write!(f, "model context limit is unknown"),
174            Self::AutoDisabled => write!(f, "automatic compaction is disabled"),
175            Self::BelowThreshold => write!(f, "context is below compaction threshold"),
176            Self::NothingToCompact => write!(f, "not enough history to compact"),
177        }
178    }
179}
180
181pub fn should_auto_compact(
182    snapshot: &ContextUsageSnapshot,
183    request: &ChatRequest,
184    policy: CompactionPolicy,
185) -> Result<(), CompactionSkip> {
186    if !policy.auto_enabled {
187        return Err(CompactionSkip::AutoDisabled);
188    }
189    let Some(max_tokens) = snapshot.max_tokens else {
190        return Err(CompactionSkip::NoKnownContextLimit);
191    };
192    if max_tokens == 0 {
193        return Err(CompactionSkip::NoKnownContextLimit);
194    }
195
196    let reserve = policy.response_reserve(request.max_tokens);
197    let over_percent = snapshot
198        .used_percent
199        .is_some_and(|p| p >= policy.auto_threshold_percent);
200    let low_remaining = snapshot
201        .remaining_tokens
202        .is_some_and(|remaining| remaining <= reserve);
203    if over_percent || low_remaining {
204        Ok(())
205    } else {
206        Err(CompactionSkip::BelowThreshold)
207    }
208}
209
210pub fn context_exceeds_hard_limit(
211    snapshot: &ContextUsageSnapshot,
212    request: &ChatRequest,
213    policy: CompactionPolicy,
214) -> bool {
215    let Some(max_tokens) = snapshot.max_tokens else {
216        return false;
217    };
218    let reserve = policy.response_reserve(request.max_tokens);
219    snapshot.used_tokens.saturating_add(reserve) >= max_tokens
220}
221
222pub fn prepare_compaction(
223    request: &CompactionRequest,
224    max_context_tokens: Option<usize>,
225) -> Result<PreparedCompaction, CompactionSkip> {
226    let messages = &request.chat.messages;
227    if messages.len() < 3 {
228        return Err(CompactionSkip::NothingToCompact);
229    }
230
231    let split =
232        tail_start_index(messages, request.policy).ok_or(CompactionSkip::NothingToCompact)?;
233    if split == 0 {
234        return Err(CompactionSkip::NothingToCompact);
235    }
236
237    let archived_messages = messages[..split].to_vec();
238    let preserved_messages = messages[split..].to_vec();
239    if archived_messages.is_empty() || preserved_messages.is_empty() {
240        return Err(CompactionSkip::NothingToCompact);
241    }
242
243    let previous_summary = archived_messages
244        .iter()
245        .rev()
246        .find(|m| {
247            m.kind == ChatMessageKind::ContextCheckpoint || m.content.contains(CHECKPOINT_MARKER)
248        })
249        .map(|m| m.content.clone());
250
251    let max_input_tokens = max_context_tokens
252        .map(|max| max.saturating_sub(request.policy.response_reserve(request.chat.max_tokens)))
253        .filter(|max| *max > 0)
254        .unwrap_or(request.policy.summarizer_input_token_budget)
255        .min(request.policy.summarizer_input_token_budget);
256    let max_chars = max_input_tokens.saturating_mul(4).max(4_000);
257    let history_excerpt = truncate_middle(
258        &format_history_excerpt(&archived_messages, request.policy),
259        max_chars,
260    );
261
262    Ok(PreparedCompaction {
263        archived_messages,
264        preserved_messages,
265        previous_summary,
266        history_excerpt,
267    })
268}
269
270pub fn build_summary_request(
271    base: &ChatRequest,
272    prepared: &PreparedCompaction,
273    focus: Option<&str>,
274    policy: CompactionPolicy,
275) -> ChatRequest {
276    ChatRequest {
277        model_id: base.model_id.clone(),
278        messages: vec![ChatMessage::user(summary_prompt(prepared, focus))],
279        system_prompt: compaction_system_prompt().to_string(),
280        instructions: None,
281        reasoning: compaction_reasoning(base.reasoning),
282        temperature: 0.0,
283        max_tokens: policy.summary_max_tokens,
284        tools: Vec::new(),
285    }
286}
287
288pub fn build_verification_request(
289    base: &ChatRequest,
290    prepared: &PreparedCompaction,
291    draft_summary: &str,
292    focus: Option<&str>,
293    policy: CompactionPolicy,
294) -> ChatRequest {
295    let prompt = format!(
296        "{}\n\n# Draft Summary\n{}\n\n# Verification Task\nCritically check the draft against the conversation excerpt. If it omitted specific file paths, commands, test results, tool results, user constraints, current state, or next steps, return an improved complete checkpoint. Otherwise return the draft unchanged. Return only the final checkpoint markdown.",
297        summary_prompt(prepared, focus),
298        draft_summary.trim()
299    );
300    ChatRequest {
301        model_id: base.model_id.clone(),
302        messages: vec![ChatMessage::user(prompt)],
303        system_prompt: compaction_system_prompt().to_string(),
304        instructions: None,
305        reasoning: compaction_reasoning(base.reasoning),
306        temperature: 0.0,
307        max_tokens: policy.summary_max_tokens,
308        tools: Vec::new(),
309    }
310}
311
312pub fn build_replacement_messages(
313    summary: &str,
314    prepared: &PreparedCompaction,
315    record: &CompactionRecord,
316) -> Vec<ChatMessage> {
317    let checkpoint = format!(
318        "# {}\n\nCompaction id: {}\nTrigger: {}\nCreated: {}\nArchived messages: {}\nPreserved messages: {}\n\n{}",
319        CHECKPOINT_MARKER,
320        record.id,
321        record.trigger.as_str(),
322        record.created_at.to_rfc3339(),
323        record.archived_message_count,
324        record.preserved_message_count,
325        summary.trim()
326    );
327    let mut user = ChatMessage::user(checkpoint);
328    user.kind = ChatMessageKind::ContextCheckpoint;
329    user.metadata = Some(serde_json::json!({
330        "compaction_id": record.id,
331        "trigger": record.trigger.as_str(),
332        "before_tokens": record.before_tokens,
333        "after_tokens": record.after_tokens,
334        "archived_message_count": record.archived_message_count,
335        "preserved_message_count": record.preserved_message_count,
336        "duration_secs": record.duration_secs,
337    }));
338
339    let mut assistant = ChatMessage::assistant(compaction_receipt(record));
340    assistant.kind = ChatMessageKind::ContextCheckpoint;
341    assistant.metadata = user.metadata.clone();
342
343    let mut messages = Vec::with_capacity(2 + prepared.preserved_messages.len());
344    messages.push(user);
345    messages.push(assistant);
346    messages.extend(prepared.preserved_messages.clone());
347    messages
348}
349
350pub fn compaction_receipt(record: &CompactionRecord) -> String {
351    format!(
352        "Context compacted: {} -> {} tokens, archived {} messages, preserved {} messages, took {:.1}s. I will continue from this checkpoint.",
353        format_compact_count(record.before_tokens),
354        format_compact_count(record.after_tokens),
355        record.archived_message_count,
356        record.preserved_message_count,
357        record.duration_secs
358    )
359}
360
361pub fn normalize_summary(text: &str) -> String {
362    let trimmed = text.trim();
363    if let Some(summary) = extract_tagged_summary(trimmed) {
364        return summary.trim().to_string();
365    }
366    trimmed.to_string()
367}
368
369pub fn combine_usage(a: Option<TokenUsage>, b: Option<TokenUsage>) -> Option<TokenUsage> {
370    match (a, b) {
371        (None, None) => None,
372        (Some(u), None) | (None, Some(u)) => Some(u),
373        (Some(mut left), Some(right)) => {
374            left.prompt_tokens = left.prompt_tokens.saturating_add(right.prompt_tokens);
375            left.completion_tokens = left
376                .completion_tokens
377                .saturating_add(right.completion_tokens);
378            left.total_tokens = left.total_tokens.saturating_add(right.total_tokens);
379            left.cached_input_tokens = left
380                .cached_input_tokens
381                .saturating_add(right.cached_input_tokens);
382            left.cache_creation_input_tokens = left
383                .cache_creation_input_tokens
384                .saturating_add(right.cache_creation_input_tokens);
385            left.reasoning_output_tokens = left
386                .reasoning_output_tokens
387                .saturating_add(right.reasoning_output_tokens);
388            Some(left)
389        },
390    }
391}
392
393pub fn estimate_messages_tokens(messages: &[ChatMessage]) -> usize {
394    messages.iter().map(estimate_message_tokens).sum()
395}
396
397pub fn format_compact_count(value: usize) -> String {
398    if value >= 1_000_000 {
399        format!("{:.1}M", value as f64 / 1_000_000.0)
400    } else if value >= 1_000 {
401        format!("{:.1}k", value as f64 / 1_000.0)
402    } else {
403        value.to_string()
404    }
405}
406
407fn compaction_system_prompt() -> &'static str {
408    "You are performing context checkpoint compaction for Mermaid, a model-agnostic agentic coding CLI. Produce a faithful handoff summary for the next model call. Preserve exact file paths, commands, errors, tool results, user preferences, decisions, current state, and next steps. Do not invent facts. Be concise but complete."
409}
410
411fn compaction_reasoning(current: ReasoningLevel) -> ReasoningLevel {
412    match current {
413        ReasoningLevel::None | ReasoningLevel::Minimal => current,
414        _ => ReasoningLevel::Low,
415    }
416}
417
418fn summary_prompt(prepared: &PreparedCompaction, focus: Option<&str>) -> String {
419    let anchor = prepared
420        .previous_summary
421        .as_deref()
422        .map(|summary| {
423            format!(
424                "A previous checkpoint exists. Update it with the newer history, preserve still-true details, and remove stale details.\n\n<previous_checkpoint>\n{}\n</previous_checkpoint>",
425                summary.trim()
426            )
427        })
428        .unwrap_or_else(|| "Create a new checkpoint from the conversation history below.".to_string());
429
430    let focus = focus
431        .filter(|s| !s.trim().is_empty())
432        .map(|s| format!("\n# User Focus Instructions\n{}\n", s.trim()))
433        .unwrap_or_default();
434
435    format!(
436        "{anchor}{focus}\n# Required Output\nReturn exactly this Markdown structure and keep section order:\n\n## Goal\n- [single-sentence task summary]\n\n## User Preferences And Constraints\n- [preferences, constraints, mode, or \"(none)\"]\n\n## Project State\n- [repo/product state and important architecture facts]\n\n## Completed Work\n- [what has already been done]\n\n## Current Work\n- [what is actively in progress]\n\n## Key Decisions\n- [decision and rationale]\n\n## Critical Files And Symbols\n- [file path or symbol: why it matters]\n\n## Commands Tests And Results\n- [command/test/result/error]\n\n## Open Questions Or Risks\n- [risk/question/blocker]\n\n## Next Steps\n- [ordered next action]\n\nRules:\n- Preserve exact paths, commands, error strings, identifiers, and numeric facts when known.\n- Mention important omitted or truncated data explicitly.\n- Do not mention that you are an AI or explain the compaction process.\n\n# Conversation History To Compact\n{}",
437        prepared.history_excerpt
438    )
439}
440
441fn tail_start_index(messages: &[ChatMessage], policy: CompactionPolicy) -> Option<usize> {
442    let mut user_turns = 0usize;
443    let mut start = None;
444    for (idx, msg) in messages.iter().enumerate().rev() {
445        if msg.role == MessageRole::User {
446            user_turns += 1;
447            start = Some(idx);
448            if user_turns >= policy.tail_turns {
449                break;
450            }
451        }
452    }
453    let mut start = start?;
454    while estimate_messages_tokens(&messages[start..]) > policy.tail_token_budget {
455        let next_user = messages
456            .iter()
457            .enumerate()
458            .skip(start + 1)
459            .find(|(_, msg)| msg.role == MessageRole::User)
460            .map(|(idx, _)| idx);
461        match next_user {
462            Some(idx) => start = idx,
463            None => break,
464        }
465    }
466    Some(start)
467}
468
469fn format_history_excerpt(messages: &[ChatMessage], policy: CompactionPolicy) -> String {
470    let mut out = String::new();
471    for (idx, msg) in messages.iter().enumerate() {
472        let role = match msg.role {
473            MessageRole::User => "USER",
474            MessageRole::Assistant => "ASSISTANT",
475            MessageRole::System => "SYSTEM",
476            MessageRole::Tool => "TOOL",
477        };
478        out.push_str(&format!("\n\n--- MESSAGE {} [{}] ---\n", idx + 1, role));
479        if msg.kind != ChatMessageKind::Normal {
480            out.push_str(&format!("kind: {:?}\n", msg.kind));
481        }
482        if let Some(name) = &msg.tool_name {
483            out.push_str(&format!("tool_name: {}\n", name));
484        }
485        if let Some(id) = &msg.tool_call_id {
486            out.push_str(&format!("tool_call_id: {}\n", id));
487        }
488        if let Some(calls) = &msg.tool_calls {
489            let names: Vec<&str> = calls
490                .iter()
491                .map(|call| call.function.name.as_str())
492                .collect();
493            out.push_str(&format!("tool_calls: {}\n", names.join(", ")));
494        }
495        if let Some(images) = &msg.images
496            && !images.is_empty()
497        {
498            out.push_str(&format!("[{} image attachment(s) omitted]\n", images.len()));
499        }
500        for action in &msg.actions {
501            out.push_str(&format!(
502                "action: {}({}) duration={:?}\n",
503                action.action_type, action.target, action.duration_seconds
504            ));
505            if let Some(metadata) = &action.metadata {
506                out.push_str(&format!("action_metadata: {:?}\n", metadata));
507            }
508        }
509        let cap = if msg.role == MessageRole::Tool {
510            policy.tool_output_max_chars
511        } else {
512            policy.tool_output_max_chars.saturating_mul(4)
513        };
514        out.push_str(&truncate_middle(&msg.content, cap));
515    }
516    out
517}
518
519fn estimate_message_tokens(msg: &ChatMessage) -> usize {
520    let mut chars = msg.content.len();
521    chars = chars.saturating_add(format!("{:?}", msg.role).len());
522    chars = chars.saturating_add(msg.tool_name.as_deref().map(str::len).unwrap_or(0));
523    chars = chars.saturating_add(msg.tool_call_id.as_deref().map(str::len).unwrap_or(0));
524    if let Some(images) = &msg.images {
525        chars = chars.saturating_add(images.iter().map(String::len).sum::<usize>());
526    }
527    chars.div_ceil(4)
528}
529
530fn truncate_middle(text: &str, max_chars: usize) -> String {
531    if text.chars().count() <= max_chars {
532        return text.to_string();
533    }
534    if max_chars < 128 {
535        return text.chars().take(max_chars).collect();
536    }
537    let marker = "\n\n[... truncated during context compaction ...]\n\n";
538    let keep = max_chars.saturating_sub(marker.len());
539    let head = keep / 2;
540    let tail = keep.saturating_sub(head);
541    let start: String = text.chars().take(head).collect();
542    let end: String = text
543        .chars()
544        .rev()
545        .take(tail)
546        .collect::<Vec<_>>()
547        .into_iter()
548        .rev()
549        .collect();
550    format!("{start}{marker}{end}")
551}
552
553fn extract_tagged_summary(text: &str) -> Option<&str> {
554    let start_tag = "<summary>";
555    let end_tag = "</summary>";
556    let start = text.find(start_tag)? + start_tag.len();
557    let end = text[start..].find(end_tag)? + start;
558    Some(&text[start..end])
559}
560
561#[cfg(test)]
562mod tests {
563    use super::*;
564
565    fn request_with(messages: Vec<ChatMessage>) -> ChatRequest {
566        ChatRequest {
567            model_id: "ollama/test".to_string(),
568            messages,
569            system_prompt: "system".to_string(),
570            instructions: None,
571            reasoning: ReasoningLevel::Medium,
572            temperature: 0.7,
573            max_tokens: 4096,
574            tools: Vec::new(),
575        }
576    }
577
578    #[test]
579    fn auto_compaction_triggers_by_percent() {
580        let snapshot = ContextUsageSnapshot::from_estimate(
581            super::super::state::PromptTokenBreakdown {
582                system_tokens: 0,
583                instructions_tokens: 0,
584                message_tokens: 86,
585                tool_schema_tokens: 0,
586                image_count: 0,
587                message_count: 2,
588                tool_count: 0,
589            },
590            Some(100),
591        );
592        let req = request_with(vec![ChatMessage::user("hello")]);
593        assert!(should_auto_compact(&snapshot, &req, CompactionPolicy::default()).is_ok());
594    }
595
596    #[test]
597    fn prepare_preserves_recent_two_user_turns() {
598        let messages = vec![
599            ChatMessage::user("one"),
600            ChatMessage::assistant("one answer"),
601            ChatMessage::user("two"),
602            ChatMessage::assistant("two answer"),
603            ChatMessage::user("three"),
604        ];
605        let request = CompactionRequest::manual(request_with(messages), None);
606        let prepared = prepare_compaction(&request, Some(100_000)).expect("prepared");
607        assert_eq!(prepared.archived_messages.len(), 2);
608        assert_eq!(prepared.preserved_messages.len(), 3);
609        assert_eq!(prepared.preserved_messages[0].content, "two");
610    }
611
612    #[test]
613    fn replacement_starts_with_checkpoint_and_ack() {
614        let prepared = PreparedCompaction {
615            archived_messages: vec![ChatMessage::user("old")],
616            preserved_messages: vec![ChatMessage::user("new")],
617            previous_summary: None,
618            history_excerpt: "old".to_string(),
619        };
620        let record = CompactionRecord {
621            id: "c1".to_string(),
622            trigger: CompactionTrigger::Manual,
623            created_at: Local::now(),
624            before_tokens: 100,
625            after_tokens: 25,
626            archived_message_count: 1,
627            preserved_message_count: 1,
628            summary_tokens: 10,
629            duration_secs: 1.0,
630            focus: None,
631            archive_path: None,
632        };
633        let messages = build_replacement_messages("## Goal\n- continue", &prepared, &record);
634        assert_eq!(messages[0].kind, ChatMessageKind::ContextCheckpoint);
635        assert!(messages[0].content.contains(CHECKPOINT_MARKER));
636        assert_eq!(messages[2].content, "new");
637    }
638}