1use chrono::{DateTime, Local};
10use serde::{Deserialize, Serialize};
11
12use crate::constants::{
13 COMPACTION_AUTO_THRESHOLD_PERCENT, COMPACTION_MAX_RESPONSE_RESERVE_TOKENS,
14 COMPACTION_MIN_RESPONSE_RESERVE_TOKENS, COMPACTION_SUMMARIZER_INPUT_TOKEN_BUDGET,
15 COMPACTION_SUMMARY_MAX_TOKENS, COMPACTION_TAIL_TOKEN_BUDGET, COMPACTION_TAIL_TURNS,
16 COMPACTION_TOOL_OUTPUT_MAX_CHARS,
17};
18use crate::models::{ChatMessage, ChatMessageKind, MessageRole, ReasoningLevel, TokenUsage};
19
20use super::cmd::ChatRequest;
21use super::state::ContextUsageSnapshot;
22
23const CHECKPOINT_MARKER: &str = "MERMAID CONTEXT CHECKPOINT";
24
25#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
26#[serde(rename_all = "snake_case")]
27pub enum CompactionTrigger {
28 Manual,
29 AutoThreshold,
30 ContextLimitRetry,
31}
32
33impl CompactionTrigger {
34 pub fn as_str(self) -> &'static str {
35 match self {
36 Self::Manual => "manual",
37 Self::AutoThreshold => "auto_threshold",
38 Self::ContextLimitRetry => "context_limit_retry",
39 }
40 }
41
42 pub fn label(self) -> &'static str {
43 match self {
44 Self::Manual => "manual",
45 Self::AutoThreshold => "automatic",
46 Self::ContextLimitRetry => "context-limit retry",
47 }
48 }
49}
50
51#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
52pub struct CompactionPolicy {
53 pub auto_enabled: bool,
54 pub auto_threshold_percent: u8,
55 pub tail_turns: usize,
56 pub tail_token_budget: usize,
57 pub tool_output_max_chars: usize,
58 pub summary_max_tokens: usize,
59 pub summarizer_input_token_budget: usize,
60 pub min_response_reserve_tokens: usize,
61 pub max_response_reserve_tokens: usize,
62}
63
64impl Default for CompactionPolicy {
65 fn default() -> Self {
66 Self {
67 auto_enabled: true,
68 auto_threshold_percent: COMPACTION_AUTO_THRESHOLD_PERCENT,
69 tail_turns: COMPACTION_TAIL_TURNS,
70 tail_token_budget: COMPACTION_TAIL_TOKEN_BUDGET,
71 tool_output_max_chars: COMPACTION_TOOL_OUTPUT_MAX_CHARS,
72 summary_max_tokens: COMPACTION_SUMMARY_MAX_TOKENS,
73 summarizer_input_token_budget: COMPACTION_SUMMARIZER_INPUT_TOKEN_BUDGET,
74 min_response_reserve_tokens: COMPACTION_MIN_RESPONSE_RESERVE_TOKENS,
75 max_response_reserve_tokens: COMPACTION_MAX_RESPONSE_RESERVE_TOKENS,
76 }
77 }
78}
79
80impl CompactionPolicy {
81 pub fn response_reserve(self, request_max_tokens: usize) -> usize {
82 request_max_tokens
83 .max(self.min_response_reserve_tokens)
84 .min(self.max_response_reserve_tokens)
85 }
86}
87
88#[derive(Debug, Clone)]
89pub struct CompactionRequest {
90 pub chat: ChatRequest,
91 pub trigger: CompactionTrigger,
92 pub instructions: Option<String>,
93 pub force: bool,
94 pub policy: CompactionPolicy,
95}
96
97impl CompactionRequest {
98 pub fn manual(chat: ChatRequest, instructions: Option<String>) -> Self {
99 Self {
100 chat,
101 trigger: CompactionTrigger::Manual,
102 instructions,
103 force: true,
104 policy: CompactionPolicy::default(),
105 }
106 }
107
108 pub fn auto(chat: ChatRequest, trigger: CompactionTrigger) -> Self {
109 Self {
110 chat,
111 trigger,
112 instructions: None,
113 force: false,
114 policy: CompactionPolicy::default(),
115 }
116 }
117}
118
119#[derive(Debug, Clone, Serialize, Deserialize)]
120pub struct CompactionRecord {
121 pub id: String,
122 pub trigger: CompactionTrigger,
123 pub created_at: DateTime<Local>,
124 pub before_tokens: usize,
125 pub after_tokens: usize,
126 pub archived_message_count: usize,
127 pub preserved_message_count: usize,
128 pub summary_tokens: usize,
129 pub duration_secs: f64,
130 #[serde(default)]
131 pub focus: Option<String>,
132 #[serde(default)]
133 pub archive_path: Option<String>,
134}
135
136#[derive(Debug, Clone, Serialize, Deserialize)]
137pub struct CompactionArchive {
138 pub id: String,
139 pub conversation_id: String,
140 pub created_at: DateTime<Local>,
141 pub messages: Vec<ChatMessage>,
142}
143
144#[derive(Debug, Clone)]
145pub struct CompactionResult {
146 pub record: CompactionRecord,
147 pub replacement_messages: Vec<ChatMessage>,
148 pub archived_messages: Vec<ChatMessage>,
149 pub before_snapshot: ContextUsageSnapshot,
150 pub after_snapshot: ContextUsageSnapshot,
151 pub usage: Option<TokenUsage>,
152}
153
154#[derive(Debug, Clone)]
155pub struct PreparedCompaction {
156 pub archived_messages: Vec<ChatMessage>,
157 pub preserved_messages: Vec<ChatMessage>,
158 pub previous_summary: Option<String>,
159 pub history_excerpt: String,
160}
161
162#[derive(Debug, Clone, PartialEq, Eq)]
163pub enum CompactionSkip {
164 NoKnownContextLimit,
165 AutoDisabled,
166 BelowThreshold,
167 NothingToCompact,
168}
169
170impl std::fmt::Display for CompactionSkip {
171 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
172 match self {
173 Self::NoKnownContextLimit => write!(f, "model context limit is unknown"),
174 Self::AutoDisabled => write!(f, "automatic compaction is disabled"),
175 Self::BelowThreshold => write!(f, "context is below compaction threshold"),
176 Self::NothingToCompact => write!(f, "not enough history to compact"),
177 }
178 }
179}
180
181pub fn should_auto_compact(
182 snapshot: &ContextUsageSnapshot,
183 request: &ChatRequest,
184 policy: CompactionPolicy,
185) -> Result<(), CompactionSkip> {
186 if !policy.auto_enabled {
187 return Err(CompactionSkip::AutoDisabled);
188 }
189 let Some(max_tokens) = snapshot.max_tokens else {
190 return Err(CompactionSkip::NoKnownContextLimit);
191 };
192 if max_tokens == 0 {
193 return Err(CompactionSkip::NoKnownContextLimit);
194 }
195
196 let reserve = policy.response_reserve(request.max_tokens);
197 let over_percent = snapshot
198 .used_percent
199 .is_some_and(|p| p >= policy.auto_threshold_percent);
200 let low_remaining = snapshot
201 .remaining_tokens
202 .is_some_and(|remaining| remaining <= reserve);
203 if over_percent || low_remaining {
204 Ok(())
205 } else {
206 Err(CompactionSkip::BelowThreshold)
207 }
208}
209
210pub fn context_exceeds_hard_limit(
211 snapshot: &ContextUsageSnapshot,
212 request: &ChatRequest,
213 policy: CompactionPolicy,
214) -> bool {
215 let Some(max_tokens) = snapshot.max_tokens else {
216 return false;
217 };
218 let reserve = policy.response_reserve(request.max_tokens);
219 snapshot.used_tokens.saturating_add(reserve) >= max_tokens
220}
221
222pub fn prepare_compaction(
223 request: &CompactionRequest,
224 max_context_tokens: Option<usize>,
225) -> Result<PreparedCompaction, CompactionSkip> {
226 let messages = &request.chat.messages;
227 if messages.len() < 3 {
228 return Err(CompactionSkip::NothingToCompact);
229 }
230
231 let split =
232 tail_start_index(messages, request.policy).ok_or(CompactionSkip::NothingToCompact)?;
233 if split == 0 {
234 return Err(CompactionSkip::NothingToCompact);
235 }
236
237 let archived_messages = messages[..split].to_vec();
238 let preserved_messages = messages[split..].to_vec();
239 if archived_messages.is_empty() || preserved_messages.is_empty() {
240 return Err(CompactionSkip::NothingToCompact);
241 }
242
243 let previous_summary = archived_messages
244 .iter()
245 .rev()
246 .find(|m| {
247 m.kind == ChatMessageKind::ContextCheckpoint || m.content.contains(CHECKPOINT_MARKER)
248 })
249 .map(|m| m.content.clone());
250
251 let max_input_tokens = max_context_tokens
252 .map(|max| max.saturating_sub(request.policy.response_reserve(request.chat.max_tokens)))
253 .filter(|max| *max > 0)
254 .unwrap_or(request.policy.summarizer_input_token_budget)
255 .min(request.policy.summarizer_input_token_budget);
256 let max_chars = max_input_tokens.saturating_mul(4).max(4_000);
257 let history_excerpt = truncate_middle(
258 &format_history_excerpt(&archived_messages, request.policy),
259 max_chars,
260 );
261
262 Ok(PreparedCompaction {
263 archived_messages,
264 preserved_messages,
265 previous_summary,
266 history_excerpt,
267 })
268}
269
270pub fn build_summary_request(
271 base: &ChatRequest,
272 prepared: &PreparedCompaction,
273 focus: Option<&str>,
274 policy: CompactionPolicy,
275) -> ChatRequest {
276 ChatRequest {
277 model_id: base.model_id.clone(),
278 messages: vec![ChatMessage::user(summary_prompt(prepared, focus))],
279 system_prompt: compaction_system_prompt().to_string(),
280 instructions: None,
281 reasoning: compaction_reasoning(base.reasoning),
282 temperature: 0.0,
283 max_tokens: policy.summary_max_tokens,
284 tools: Vec::new(),
285 }
286}
287
288pub fn build_verification_request(
289 base: &ChatRequest,
290 prepared: &PreparedCompaction,
291 draft_summary: &str,
292 focus: Option<&str>,
293 policy: CompactionPolicy,
294) -> ChatRequest {
295 let prompt = format!(
296 "{}\n\n# Draft Summary\n{}\n\n# Verification Task\nCritically check the draft against the conversation excerpt. If it omitted specific file paths, commands, test results, tool results, user constraints, current state, or next steps, return an improved complete checkpoint. Otherwise return the draft unchanged. Return only the final checkpoint markdown.",
297 summary_prompt(prepared, focus),
298 draft_summary.trim()
299 );
300 ChatRequest {
301 model_id: base.model_id.clone(),
302 messages: vec![ChatMessage::user(prompt)],
303 system_prompt: compaction_system_prompt().to_string(),
304 instructions: None,
305 reasoning: compaction_reasoning(base.reasoning),
306 temperature: 0.0,
307 max_tokens: policy.summary_max_tokens,
308 tools: Vec::new(),
309 }
310}
311
312pub fn build_replacement_messages(
313 summary: &str,
314 prepared: &PreparedCompaction,
315 record: &CompactionRecord,
316) -> Vec<ChatMessage> {
317 let checkpoint = format!(
318 "# {}\n\nCompaction id: {}\nTrigger: {}\nCreated: {}\nArchived messages: {}\nPreserved messages: {}\n\n{}",
319 CHECKPOINT_MARKER,
320 record.id,
321 record.trigger.as_str(),
322 record.created_at.to_rfc3339(),
323 record.archived_message_count,
324 record.preserved_message_count,
325 summary.trim()
326 );
327 let mut user = ChatMessage::user(checkpoint);
328 user.kind = ChatMessageKind::ContextCheckpoint;
329 user.metadata = Some(serde_json::json!({
330 "compaction_id": record.id,
331 "trigger": record.trigger.as_str(),
332 "before_tokens": record.before_tokens,
333 "after_tokens": record.after_tokens,
334 "archived_message_count": record.archived_message_count,
335 "preserved_message_count": record.preserved_message_count,
336 "duration_secs": record.duration_secs,
337 }));
338
339 let mut assistant = ChatMessage::assistant(compaction_receipt(record));
340 assistant.kind = ChatMessageKind::ContextCheckpoint;
341 assistant.metadata = user.metadata.clone();
342
343 let mut messages = Vec::with_capacity(2 + prepared.preserved_messages.len());
344 messages.push(user);
345 messages.push(assistant);
346 messages.extend(prepared.preserved_messages.clone());
347 messages
348}
349
350pub fn compaction_receipt(record: &CompactionRecord) -> String {
351 format!(
352 "Context compacted: {} -> {} tokens, archived {} messages, preserved {} messages, took {:.1}s. I will continue from this checkpoint.",
353 format_compact_count(record.before_tokens),
354 format_compact_count(record.after_tokens),
355 record.archived_message_count,
356 record.preserved_message_count,
357 record.duration_secs
358 )
359}
360
361pub fn normalize_summary(text: &str) -> String {
362 let trimmed = text.trim();
363 if let Some(summary) = extract_tagged_summary(trimmed) {
364 return summary.trim().to_string();
365 }
366 trimmed.to_string()
367}
368
369pub fn combine_usage(a: Option<TokenUsage>, b: Option<TokenUsage>) -> Option<TokenUsage> {
370 match (a, b) {
371 (None, None) => None,
372 (Some(u), None) | (None, Some(u)) => Some(u),
373 (Some(mut left), Some(right)) => {
374 left.prompt_tokens = left.prompt_tokens.saturating_add(right.prompt_tokens);
375 left.completion_tokens = left
376 .completion_tokens
377 .saturating_add(right.completion_tokens);
378 left.total_tokens = left.total_tokens.saturating_add(right.total_tokens);
379 left.cached_input_tokens = left
380 .cached_input_tokens
381 .saturating_add(right.cached_input_tokens);
382 left.cache_creation_input_tokens = left
383 .cache_creation_input_tokens
384 .saturating_add(right.cache_creation_input_tokens);
385 left.reasoning_output_tokens = left
386 .reasoning_output_tokens
387 .saturating_add(right.reasoning_output_tokens);
388 Some(left)
389 },
390 }
391}
392
393pub fn estimate_messages_tokens(messages: &[ChatMessage]) -> usize {
394 messages.iter().map(estimate_message_tokens).sum()
395}
396
397pub fn format_compact_count(value: usize) -> String {
398 if value >= 1_000_000 {
399 format!("{:.1}M", value as f64 / 1_000_000.0)
400 } else if value >= 1_000 {
401 format!("{:.1}k", value as f64 / 1_000.0)
402 } else {
403 value.to_string()
404 }
405}
406
407fn compaction_system_prompt() -> &'static str {
408 "You are performing context checkpoint compaction for Mermaid, a model-agnostic agentic coding CLI. Produce a faithful handoff summary for the next model call. Preserve exact file paths, commands, errors, tool results, user preferences, decisions, current state, and next steps. Do not invent facts. Be concise but complete."
409}
410
411fn compaction_reasoning(current: ReasoningLevel) -> ReasoningLevel {
412 match current {
413 ReasoningLevel::None | ReasoningLevel::Minimal => current,
414 _ => ReasoningLevel::Low,
415 }
416}
417
418fn summary_prompt(prepared: &PreparedCompaction, focus: Option<&str>) -> String {
419 let anchor = prepared
420 .previous_summary
421 .as_deref()
422 .map(|summary| {
423 format!(
424 "A previous checkpoint exists. Update it with the newer history, preserve still-true details, and remove stale details.\n\n<previous_checkpoint>\n{}\n</previous_checkpoint>",
425 summary.trim()
426 )
427 })
428 .unwrap_or_else(|| "Create a new checkpoint from the conversation history below.".to_string());
429
430 let focus = focus
431 .filter(|s| !s.trim().is_empty())
432 .map(|s| format!("\n# User Focus Instructions\n{}\n", s.trim()))
433 .unwrap_or_default();
434
435 format!(
436 "{anchor}{focus}\n# Required Output\nReturn exactly this Markdown structure and keep section order:\n\n## Goal\n- [single-sentence task summary]\n\n## User Preferences And Constraints\n- [preferences, constraints, mode, or \"(none)\"]\n\n## Project State\n- [repo/product state and important architecture facts]\n\n## Completed Work\n- [what has already been done]\n\n## Current Work\n- [what is actively in progress]\n\n## Key Decisions\n- [decision and rationale]\n\n## Critical Files And Symbols\n- [file path or symbol: why it matters]\n\n## Commands Tests And Results\n- [command/test/result/error]\n\n## Open Questions Or Risks\n- [risk/question/blocker]\n\n## Next Steps\n- [ordered next action]\n\nRules:\n- Preserve exact paths, commands, error strings, identifiers, and numeric facts when known.\n- Mention important omitted or truncated data explicitly.\n- Do not mention that you are an AI or explain the compaction process.\n\n# Conversation History To Compact\n{}",
437 prepared.history_excerpt
438 )
439}
440
441fn tail_start_index(messages: &[ChatMessage], policy: CompactionPolicy) -> Option<usize> {
442 let mut user_turns = 0usize;
443 let mut start = None;
444 for (idx, msg) in messages.iter().enumerate().rev() {
445 if msg.role == MessageRole::User {
446 user_turns += 1;
447 start = Some(idx);
448 if user_turns >= policy.tail_turns {
449 break;
450 }
451 }
452 }
453 let mut start = start?;
454 while estimate_messages_tokens(&messages[start..]) > policy.tail_token_budget {
455 let next_user = messages
456 .iter()
457 .enumerate()
458 .skip(start + 1)
459 .find(|(_, msg)| msg.role == MessageRole::User)
460 .map(|(idx, _)| idx);
461 match next_user {
462 Some(idx) => start = idx,
463 None => break,
464 }
465 }
466 Some(start)
467}
468
469fn format_history_excerpt(messages: &[ChatMessage], policy: CompactionPolicy) -> String {
470 let mut out = String::new();
471 for (idx, msg) in messages.iter().enumerate() {
472 let role = match msg.role {
473 MessageRole::User => "USER",
474 MessageRole::Assistant => "ASSISTANT",
475 MessageRole::System => "SYSTEM",
476 MessageRole::Tool => "TOOL",
477 };
478 out.push_str(&format!("\n\n--- MESSAGE {} [{}] ---\n", idx + 1, role));
479 if msg.kind != ChatMessageKind::Normal {
480 out.push_str(&format!("kind: {:?}\n", msg.kind));
481 }
482 if let Some(name) = &msg.tool_name {
483 out.push_str(&format!("tool_name: {}\n", name));
484 }
485 if let Some(id) = &msg.tool_call_id {
486 out.push_str(&format!("tool_call_id: {}\n", id));
487 }
488 if let Some(calls) = &msg.tool_calls {
489 let names: Vec<&str> = calls
490 .iter()
491 .map(|call| call.function.name.as_str())
492 .collect();
493 out.push_str(&format!("tool_calls: {}\n", names.join(", ")));
494 }
495 if let Some(images) = &msg.images
496 && !images.is_empty()
497 {
498 out.push_str(&format!("[{} image attachment(s) omitted]\n", images.len()));
499 }
500 for action in &msg.actions {
501 out.push_str(&format!(
502 "action: {}({}) duration={:?}\n",
503 action.action_type, action.target, action.duration_seconds
504 ));
505 if let Some(metadata) = &action.metadata {
506 out.push_str(&format!("action_metadata: {:?}\n", metadata));
507 }
508 }
509 let cap = if msg.role == MessageRole::Tool {
510 policy.tool_output_max_chars
511 } else {
512 policy.tool_output_max_chars.saturating_mul(4)
513 };
514 out.push_str(&truncate_middle(&msg.content, cap));
515 }
516 out
517}
518
519fn estimate_message_tokens(msg: &ChatMessage) -> usize {
520 let mut chars = msg.content.len();
521 chars = chars.saturating_add(format!("{:?}", msg.role).len());
522 chars = chars.saturating_add(msg.tool_name.as_deref().map(str::len).unwrap_or(0));
523 chars = chars.saturating_add(msg.tool_call_id.as_deref().map(str::len).unwrap_or(0));
524 if let Some(images) = &msg.images {
525 chars = chars.saturating_add(images.iter().map(String::len).sum::<usize>());
526 }
527 chars.div_ceil(4)
528}
529
530fn truncate_middle(text: &str, max_chars: usize) -> String {
531 if text.chars().count() <= max_chars {
532 return text.to_string();
533 }
534 if max_chars < 128 {
535 return text.chars().take(max_chars).collect();
536 }
537 let marker = "\n\n[... truncated during context compaction ...]\n\n";
538 let keep = max_chars.saturating_sub(marker.len());
539 let head = keep / 2;
540 let tail = keep.saturating_sub(head);
541 let start: String = text.chars().take(head).collect();
542 let end: String = text
543 .chars()
544 .rev()
545 .take(tail)
546 .collect::<Vec<_>>()
547 .into_iter()
548 .rev()
549 .collect();
550 format!("{start}{marker}{end}")
551}
552
553fn extract_tagged_summary(text: &str) -> Option<&str> {
554 let start_tag = "<summary>";
555 let end_tag = "</summary>";
556 let start = text.find(start_tag)? + start_tag.len();
557 let end = text[start..].find(end_tag)? + start;
558 Some(&text[start..end])
559}
560
561#[cfg(test)]
562mod tests {
563 use super::*;
564
565 fn request_with(messages: Vec<ChatMessage>) -> ChatRequest {
566 ChatRequest {
567 model_id: "ollama/test".to_string(),
568 messages,
569 system_prompt: "system".to_string(),
570 instructions: None,
571 reasoning: ReasoningLevel::Medium,
572 temperature: 0.7,
573 max_tokens: 4096,
574 tools: Vec::new(),
575 }
576 }
577
578 #[test]
579 fn auto_compaction_triggers_by_percent() {
580 let snapshot = ContextUsageSnapshot::from_estimate(
581 super::super::state::PromptTokenBreakdown {
582 system_tokens: 0,
583 instructions_tokens: 0,
584 message_tokens: 86,
585 tool_schema_tokens: 0,
586 image_count: 0,
587 message_count: 2,
588 tool_count: 0,
589 },
590 Some(100),
591 );
592 let req = request_with(vec![ChatMessage::user("hello")]);
593 assert!(should_auto_compact(&snapshot, &req, CompactionPolicy::default()).is_ok());
594 }
595
596 #[test]
597 fn prepare_preserves_recent_two_user_turns() {
598 let messages = vec![
599 ChatMessage::user("one"),
600 ChatMessage::assistant("one answer"),
601 ChatMessage::user("two"),
602 ChatMessage::assistant("two answer"),
603 ChatMessage::user("three"),
604 ];
605 let request = CompactionRequest::manual(request_with(messages), None);
606 let prepared = prepare_compaction(&request, Some(100_000)).expect("prepared");
607 assert_eq!(prepared.archived_messages.len(), 2);
608 assert_eq!(prepared.preserved_messages.len(), 3);
609 assert_eq!(prepared.preserved_messages[0].content, "two");
610 }
611
612 #[test]
613 fn replacement_starts_with_checkpoint_and_ack() {
614 let prepared = PreparedCompaction {
615 archived_messages: vec![ChatMessage::user("old")],
616 preserved_messages: vec![ChatMessage::user("new")],
617 previous_summary: None,
618 history_excerpt: "old".to_string(),
619 };
620 let record = CompactionRecord {
621 id: "c1".to_string(),
622 trigger: CompactionTrigger::Manual,
623 created_at: Local::now(),
624 before_tokens: 100,
625 after_tokens: 25,
626 archived_message_count: 1,
627 preserved_message_count: 1,
628 summary_tokens: 10,
629 duration_secs: 1.0,
630 focus: None,
631 archive_path: None,
632 };
633 let messages = build_replacement_messages("## Goal\n- continue", &prepared, &record);
634 assert_eq!(messages[0].kind, ChatMessageKind::ContextCheckpoint);
635 assert!(messages[0].content.contains(CHECKPOINT_MARKER));
636 assert_eq!(messages[2].content, "new");
637 }
638}