Skip to main content

koda_core/
compact.rs

1//! Session compaction — summarize old messages to reclaim context.
2//!
3//! When the conversation grows long, compaction replaces older messages with
4//! a concise summary, freeing context window space for new work.
5//!
6//! ## How it works
7//!
8//! 1. **Trigger**: user types `/compact`, or auto-compact fires at ~80% context usage
9//! 2. **Summarization**: a cheap model (Standard tier) generates a summary of old messages
10//! 3. **Replacement**: old messages are archived in the DB, replaced by the summary
11//! 4. **Result**: context usage drops, conversation continues with full history awareness
12//!
13//! ## Auto-compaction
14//!
15//! When context usage exceeds the threshold (configurable, default ~80%),
16//! compaction runs automatically before the next inference call. The user
17//! sees a brief "⚡ Compacting..." indicator.
18//!
19//! ## What's preserved
20//!
21//! - Summary of all prior conversation and decisions
22//! - Progress tracking entries (survive compaction via DB metadata)
23//! - Memory facts (injected from `MEMORY.md`, not from conversation)
24//! - File ownership state (tracked in SQLite, not in messages)
25//!
26//! Pure logic, zero UI dependencies. Returns structured results
27//! for the caller (TUI or headless) to render however it likes.
28//!
29//! Compaction uses a cheap model (Standard tier) when available,
30//! falling back to the main model. Summarization is a simple task
31//! that doesn't need frontier-class reasoning.
32
33use crate::config::ModelSettings;
34use crate::db::Database;
35use crate::persistence::Persistence;
36use crate::providers::{ChatMessage, LlmProvider};
37use anyhow::{Result, bail};
38use std::sync::Arc;
39use std::sync::atomic::{AtomicU32, Ordering};
40use tokio::sync::RwLock;
41
42/// Minimum number of recent messages to keep verbatim during compaction.
43pub const COMPACT_PRESERVE_COUNT: usize = 4;
44
45/// Fraction of history to compact in partial mode (compact oldest half).
46const PARTIAL_COMPACT_FRACTION: f64 = 0.5;
47
48/// Below this message count, always do full compaction (partial is overhead).
49const PARTIAL_COMPACT_THRESHOLD: usize = 12;
50
51/// Stop auto-compacting after this many consecutive failures.
52/// Prevents wasting an API call every turn when compaction is stuck
53/// (e.g. history too large for the model, persistent API errors).
54const MAX_CONSECUTIVE_FAILURES: u32 = 3;
55
56/// Global consecutive failure counter. Shared across the session.
57static CONSECUTIVE_FAILURES: AtomicU32 = AtomicU32::new(0);
58
59/// Reset the failure counter (call after successful compaction or new session).
60pub fn reset_compact_failures() {
61    CONSECUTIVE_FAILURES.store(0, Ordering::Relaxed);
62}
63
64/// Check if the circuit breaker is tripped.
65pub fn is_compact_circuit_broken() -> bool {
66    CONSECUTIVE_FAILURES.load(Ordering::Relaxed) >= MAX_CONSECUTIVE_FAILURES
67}
68
69/// Record a compaction failure. Returns true if the circuit breaker just tripped.
70pub fn record_compact_failure() -> bool {
71    let prev = CONSECUTIVE_FAILURES.fetch_add(1, Ordering::Relaxed);
72    prev + 1 >= MAX_CONSECUTIVE_FAILURES
73}
74
75/// Record a compaction success — resets the failure counter.
76fn record_compact_success() {
77    reset_compact_failures();
78}
79
80/// Maximum number of head-truncation retries when history is too large.
81const MAX_TRUNCATION_RETRIES: usize = 3;
82
83/// Fraction of messages to drop on each truncation attempt.
84const TRUNCATION_DROP_FRACTION: f64 = 0.2;
85
86/// Result of a successful compaction.
87#[derive(Debug)]
88pub struct CompactResult {
89    /// Number of messages deleted from the database.
90    pub deleted: usize,
91    /// Estimated tokens in the summary.
92    pub summary_tokens: usize,
93}
94
95/// Why compaction was skipped (not an error, just a precondition).
96#[derive(Debug)]
97pub enum CompactSkip {
98    /// Session has unresolved tool calls — can't compact safely.
99    PendingToolCalls,
100    /// Session is too short to compact (contains N messages).
101    TooShort(usize),
102    /// History is too large for the current model to summarize without data loss.
103    /// The user should switch to a model with a larger context window or start a new session.
104    HistoryTooLarge,
105}
106
107/// Attempt to compact a session.
108///
109/// Returns `Ok(Ok(result))` on success, `Ok(Err(skip))` if a
110/// precondition prevented compaction, or `Err(e)` on failure.
111pub async fn compact_session(
112    db: &Database,
113    session_id: &str,
114    max_context_tokens: usize,
115    model_settings: &crate::config::ModelSettings,
116    provider: &Arc<RwLock<Box<dyn LlmProvider>>>,
117) -> Result<std::result::Result<CompactResult, CompactSkip>> {
118    let prov = provider.read().await;
119    compact_session_with_provider(db, session_id, max_context_tokens, model_settings, &**prov).await
120}
121
122/// Core compaction logic — accepts `&dyn LlmProvider` directly.
123///
124/// Uses partial compaction for longer sessions (≥12 messages): only the oldest
125/// half of messages are summarized and archived, preserving more recent context
126/// verbatim. Short sessions fall back to full compaction (keep last 4).
127///
128/// Used by the inference loop for pre-flight compaction (where we already
129/// have a `&dyn LlmProvider` and don't need the Arc<RwLock<>> wrapper).
130pub async fn compact_session_with_provider(
131    db: &Database,
132    session_id: &str,
133    max_context_tokens: usize,
134    model_settings: &crate::config::ModelSettings,
135    provider: &dyn LlmProvider,
136) -> Result<std::result::Result<CompactResult, CompactSkip>> {
137    // Check preconditions
138    if db.has_pending_tool_calls(session_id).await.unwrap_or(false) {
139        return Ok(Err(CompactSkip::PendingToolCalls));
140    }
141
142    let history = db.load_context(session_id).await?;
143
144    if history.len() < 4 {
145        return Ok(Err(CompactSkip::TooShort(history.len())));
146    }
147
148    // Decide how many messages to preserve (partial vs full compaction).
149    // Partial: compact the oldest half, keep the newest half.
150    // Full: compact everything except the last COMPACT_PRESERVE_COUNT.
151    let preserve_count = compute_preserve_count(history.len());
152
153    let compact_count = history.len().saturating_sub(preserve_count);
154    if compact_count == 0 {
155        return Ok(Err(CompactSkip::TooShort(history.len())));
156    }
157
158    // Only summarize the messages being compacted, not the ones we're keeping.
159    let to_compact = &history[..compact_count];
160    let conversation_text = build_conversation_text(to_compact);
161
162    tracing::info!(
163        "Compacting {compact_count}/{} messages (preserving {preserve_count})",
164        history.len(),
165    );
166
167    // Check if the conversation text fits in the current model's context.
168    // Reserve 4096 tokens for the summary output + overhead.
169    let text_tokens = (conversation_text.len() as f64 / crate::inference_helpers::CHARS_PER_TOKEN)
170        as usize
171        + crate::inference_helpers::SYSTEM_PROMPT_OVERHEAD;
172    let available = max_context_tokens.saturating_sub(4096);
173
174    // If history fits, use it as-is. Otherwise, progressively truncate
175    // the oldest messages until it fits (up to MAX_TRUNCATION_RETRIES).
176    let final_text = if text_tokens <= available {
177        conversation_text
178    } else {
179        match truncate_until_fits(to_compact, available) {
180            Some(text) => text,
181            None => return Ok(Err(CompactSkip::HistoryTooLarge)),
182        }
183    };
184
185    let summary_prompt = build_summary_prompt(&final_text);
186
187    let messages = vec![ChatMessage::text("user", &summary_prompt)];
188    // Use reduced settings for compaction on the SAME model/provider.
189    // The capacity check above guarantees the conversation text fits.
190    // Savings come from disabling thinking/reasoning, not switching models.
191    let compact_settings = ModelSettings {
192        model: model_settings.model.clone(),
193        max_tokens: Some(4096),
194        temperature: Some(0.3),
195        thinking_budget: None,
196        reasoning_effort: None,
197        max_context_tokens: model_settings.max_context_tokens,
198    };
199    let response = provider.chat(&messages, &[], &compact_settings).await?;
200
201    let summary = match response.content {
202        Some(text) if !text.trim().is_empty() => text,
203        _ => bail!("LLM returned an empty summary"),
204    };
205
206    let summary = strip_analysis_block(&summary);
207    let compact_message = format!("[Compacted conversation summary]\n\n{summary}");
208    let deleted = db
209        .compact_session(session_id, &compact_message, preserve_count)
210        .await?;
211
212    record_compact_success();
213
214    Ok(Ok(CompactResult {
215        deleted,
216        summary_tokens: summary.len() / 4,
217    }))
218}
219
220/// Compute how many messages to preserve during compaction.
221///
222/// - Short sessions (<12 messages): full compaction, keep last 4.
223/// - Longer sessions: partial compaction, keep the newest ~50%.
224///
225/// This preserves more recent context verbatim in long sessions,
226/// producing a smaller, more focused summary of just the oldest half.
227fn compute_preserve_count(total: usize) -> usize {
228    if total < PARTIAL_COMPACT_THRESHOLD {
229        COMPACT_PRESERVE_COUNT
230    } else {
231        let keep = (total as f64 * (1.0 - PARTIAL_COMPACT_FRACTION)).ceil() as usize;
232        keep.max(COMPACT_PRESERVE_COUNT)
233    }
234}
235
236/// Build the 9-section summarization prompt.
237///
238/// Adapted from CC's compaction prompt. Uses an `<analysis>` scratchpad block
239/// (stripped before storing) that demonstrably improves summary quality.
240fn build_summary_prompt(conversation_text: &str) -> String {
241    format!(
242        "CRITICAL: Respond with TEXT ONLY. Do NOT call any tools.\n\
243         Tool calls will be REJECTED and will waste your only turn.\n\
244         Your entire response must be plain text: an <analysis> block followed by a <summary> block.\n\
245         \n\
246         Your task is to create a detailed summary of the conversation so far, paying close \n\
247         attention to the user's explicit requests and your previous actions.\n\
248         This summary should be thorough in capturing technical details, code patterns, and \n\
249         architectural decisions that would be essential for continuing development work \n\
250         without losing context.\n\
251         \n\
252         Before providing your final summary, wrap your analysis in <analysis> tags to \n\
253         organize your thoughts and ensure you've covered all necessary points. In your analysis:\n\
254         \n\
255         1. Chronologically analyze each message. For each section thoroughly identify:\n\
256            - The user's explicit requests and intents\n\
257            - Your approach to addressing them\n\
258            - Key decisions, technical concepts and code patterns\n\
259            - Specific details: file names, code snippets, function signatures, file edits\n\
260            - Errors encountered and how they were fixed\n\
261            - Specific user feedback, especially corrections\n\
262         2. Double-check for technical accuracy and completeness.\n\
263         \n\
264         Your summary should include these sections:\n\
265         \n\
266         1. **Primary Request and Intent**: Capture ALL of the user's explicit requests in detail.\n\
267         2. **Key Technical Concepts**: List all important technologies and frameworks discussed.\n\
268         3. **Files and Code Sections**: Enumerate specific files examined, modified, or created. \n\
269            Include code snippets where applicable and a summary of why each file matters.\n\
270            **Be exhaustive about file paths** — once compaction runs, the only record of\n\
271            files touched in the compacted range is this summary, so missing a path means\n\
272            losing it for the rest of the session. Group as: created / modified / deleted.\n\
273         4. **Errors and Fixes**: List all errors and how they were resolved. Note user feedback.\n\
274         5. **Problem Solving**: Document problems solved and ongoing troubleshooting.\n\
275         6. **All User Messages**: List ALL user messages (not tool results). Critical for \n\
276            preserving feedback and changing intent.\n\
277         7. **Pending Tasks**: Outline anything unfinished or deferred.\n\
278            **Preserve every outstanding TodoWrite item verbatim** with its current status\n\
279            (pending / in_progress). Compaction is the only mechanism that defends plan\n\
280            continuity across context-window pressure (`DESIGN.md § Progress Tracking:\n\
281            Model-Owned, History-Persisted, Engine-Surfaced`) — the system prompt does NOT\n\
282            re-inject the todo list, so anything dropped here is gone.\n\
283         8. **Current Work**: Describe precisely what was being worked on immediately before \n\
284            this summary. Include file names and code snippets.\n\
285         9. **Optional Next Step**: Only if directly in line with the user's most recent \n\
286            explicit request. Include direct quotes from the conversation to prevent drift.\n\
287         \n\
288         Format your response as:\n\
289         \n\
290         <analysis>\n\
291         [Your thought process ensuring all points are covered]\n\
292         </analysis>\n\
293         \n\
294         <summary>\n\
295         1. Primary Request and Intent:\n\
296            [Detailed description]\n\
297         ...\n\
298         </summary>\n\
299         \n\
300         REMINDER: Do NOT call any tools. Respond with plain text only.\n\
301         \n\
302         ---\n\n{conversation_text}"
303    )
304}
305
306/// Strip the `<analysis>` scratchpad block from the summary.
307///
308/// The analysis block improves summary quality (model "thinks" before writing)
309/// but has no informational value once the summary is written. Stripping it
310/// saves tokens in the ongoing context.
311///
312/// # Examples
313///
314/// ```
315/// use koda_core::compact::strip_analysis_block;
316///
317/// let input = "<analysis>\nthinking...\n</analysis>\n\n<summary>\nThe result.\n</summary>";
318/// let result = strip_analysis_block(input);
319/// assert_eq!(result, "The result.");
320///
321/// // Plain text without tags passes through unchanged:
322/// assert_eq!(strip_analysis_block("just text"), "just text");
323/// ```
324pub fn strip_analysis_block(summary: &str) -> String {
325    // Remove <analysis>...</analysis> including the tags
326    let stripped = if let Some(start) = summary.find("<analysis>") {
327        if let Some(end) = summary.find("</analysis>") {
328            let after = end + "</analysis>".len();
329            format!("{}{}", &summary[..start], &summary[after..])
330        } else {
331            summary.to_string()
332        }
333    } else {
334        summary.to_string()
335    };
336
337    // Extract content from <summary> tags if present
338    let stripped = if let Some(start) = stripped.find("<summary>") {
339        if let Some(end) = stripped.find("</summary>") {
340            let content_start = start + "<summary>".len();
341            stripped[content_start..end].trim().to_string()
342        } else {
343            stripped
344        }
345    } else {
346        stripped
347    };
348
349    // Clean up extra whitespace
350    let mut result = String::new();
351    let mut prev_empty = false;
352    for line in stripped.lines() {
353        let is_empty = line.trim().is_empty();
354        if is_empty && prev_empty {
355            continue;
356        }
357        if !result.is_empty() {
358            result.push('\n');
359        }
360        result.push_str(line);
361        prev_empty = is_empty;
362    }
363    result.trim().to_string()
364}
365
366/// Progressively drop oldest messages until the conversation text fits
367/// in the available token budget. Keeps at least `COMPACT_PRESERVE_COUNT`
368/// recent messages. Returns `None` if it can't fit after max retries.
369fn truncate_until_fits(history: &[crate::db::Message], available_tokens: usize) -> Option<String> {
370    let total = history.len();
371    // Minimum messages to keep: the preserved tail + at least 1 to summarize
372    let min_keep = COMPACT_PRESERVE_COUNT + 1;
373    if total <= min_keep {
374        return None;
375    }
376
377    let mut drop_count = 0usize;
378    for attempt in 0..MAX_TRUNCATION_RETRIES {
379        // Drop 20% of remaining summarizable messages each attempt
380        let summarizable = total.saturating_sub(drop_count);
381        let to_drop = (summarizable as f64 * TRUNCATION_DROP_FRACTION).ceil() as usize;
382        drop_count += to_drop.max(1); // always drop at least 1
383
384        // Never drop so many that we have fewer than min_keep
385        if total.saturating_sub(drop_count) < min_keep {
386            drop_count = total - min_keep;
387        }
388
389        let truncated = &history[drop_count..];
390        let text = build_conversation_text(truncated);
391        let text_tokens = (text.len() as f64 / crate::inference_helpers::CHARS_PER_TOKEN) as usize
392            + crate::inference_helpers::SYSTEM_PROMPT_OVERHEAD;
393
394        tracing::info!(
395            "Truncation attempt {}: dropped {drop_count}/{total} messages, \
396             ~{text_tokens} tokens (budget: {available_tokens})",
397            attempt + 1,
398        );
399
400        if text_tokens <= available_tokens {
401            return Some(text);
402        }
403    }
404
405    None
406}
407
408/// Format conversation history into a single string for the summarizer.
409///
410/// Per-message content is truncated to 2000 chars (individual tool outputs
411/// can be huge but add little summarization value beyond a preview).
412/// No total cap — the capacity check in `compact_session_with_provider`
413/// guarantees the result fits in the model's context window.
414fn build_conversation_text(history: &[crate::db::Message]) -> String {
415    let mut text = String::new();
416    for msg in history {
417        let role = msg.role.as_str();
418        if let Some(ref content) = msg.content {
419            let truncated: String = content.chars().take(2000).collect();
420            text.push_str(&format!("[{role}]: {truncated}\n\n"));
421        }
422        if let Some(ref tool_calls) = msg.tool_calls {
423            let truncated: String = tool_calls.chars().take(500).collect();
424            text.push_str(&format!("[{role} tool_calls]: {truncated}\n\n"));
425        }
426    }
427    text
428}
429
430#[cfg(test)]
431mod tests {
432    use super::*;
433    use crate::db::Message;
434
435    fn make_msg(role: &str, content: Option<&str>, tool_calls: Option<&str>) -> Message {
436        Message {
437            id: 0,
438            session_id: String::new(),
439            role: role.parse().unwrap_or(crate::db::Role::User),
440            content: content.map(String::from),
441            full_content: None,
442            tool_calls: tool_calls.map(String::from),
443            tool_call_id: None,
444            prompt_tokens: None,
445            completion_tokens: None,
446            cache_read_tokens: None,
447            cache_creation_tokens: None,
448            thinking_tokens: None,
449            thinking_content: None,
450            created_at: None,
451        }
452    }
453
454    #[test]
455    fn test_circuit_breaker() {
456        reset_compact_failures();
457        assert!(!is_compact_circuit_broken());
458
459        assert!(!record_compact_failure()); // 1st
460        assert!(!is_compact_circuit_broken());
461
462        assert!(!record_compact_failure()); // 2nd
463        assert!(!is_compact_circuit_broken());
464
465        assert!(record_compact_failure()); // 3rd — trips
466        assert!(is_compact_circuit_broken());
467
468        // Reset should untrip
469        reset_compact_failures();
470        assert!(!is_compact_circuit_broken());
471    }
472
473    #[test]
474    fn test_empty_history() {
475        assert_eq!(build_conversation_text(&[]), "");
476    }
477
478    #[test]
479    fn test_basic_conversation() {
480        let msgs = vec![
481            make_msg("user", Some("hello"), None),
482            make_msg("assistant", Some("hi"), None),
483        ];
484        let text = build_conversation_text(&msgs);
485        assert!(text.contains("[user]: hello"));
486        assert!(text.contains("[assistant]: hi"));
487    }
488
489    #[test]
490    fn test_truncates_long_content_per_message() {
491        let long = "x".repeat(3000);
492        let msgs = vec![make_msg("user", Some(&long), None)];
493        let text = build_conversation_text(&msgs);
494        // Each msg content capped at 2000 chars
495        assert!(text.len() < 2100);
496    }
497
498    #[test]
499    fn test_no_total_cap() {
500        // 50 messages × 500 chars each = 25K chars — no cap applied
501        let content = "y".repeat(500);
502        let msgs: Vec<_> = (0..50)
503            .map(|_| make_msg("user", Some(&content), None))
504            .collect();
505        let text = build_conversation_text(&msgs);
506        // All 50 messages should be included (no 20K cap)
507        assert!(text.len() > 20_000);
508        assert!(!text.contains("truncated"));
509    }
510
511    #[test]
512    fn test_multibyte_boundary_safe() {
513        // Put emoji right at the 2000-char boundary
514        let mut content = "a".repeat(1999);
515        content.push('\u{1f43b}'); // bear emoji (4 bytes)
516        content.push_str("after");
517        let msgs = vec![make_msg("user", Some(&content), None)];
518        let text = build_conversation_text(&msgs);
519        // Should not panic on char boundary
520        assert!(text.contains("\u{1f43b}") || !text.contains("after"));
521    }
522
523    #[test]
524    fn test_tool_calls_included() {
525        let msgs = vec![make_msg("assistant", None, Some("{\"name\": \"Read\"}"))];
526        let text = build_conversation_text(&msgs);
527        assert!(text.contains("tool_calls"));
528        assert!(text.contains("Read"));
529    }
530
531    #[test]
532    fn test_none_content_skipped() {
533        let msgs = vec![make_msg("tool", None, None)];
534        let text = build_conversation_text(&msgs);
535        assert_eq!(text, "");
536    }
537
538    #[test]
539    fn test_strip_analysis_block() {
540        let input = "<analysis>\nthinking here\n</analysis>\n\n<summary>\n1. Primary Request:\n   Build a thing\n</summary>";
541        let result = strip_analysis_block(input);
542        assert!(result.contains("Primary Request"));
543        assert!(!result.contains("<analysis>"));
544        assert!(!result.contains("thinking here"));
545        assert!(!result.contains("<summary>"));
546    }
547
548    #[test]
549    fn test_strip_analysis_no_tags() {
550        let input = "Just a plain summary";
551        assert_eq!(strip_analysis_block(input), "Just a plain summary");
552    }
553
554    #[test]
555    fn test_strip_analysis_only_summary_tags() {
556        let input = "<summary>\nThe good stuff\n</summary>";
557        let result = strip_analysis_block(input);
558        assert_eq!(result, "The good stuff");
559    }
560
561    #[test]
562    fn test_truncate_until_fits_drops_oldest() {
563        // 20 messages, each ~50 chars
564        let msgs: Vec<_> = (0..20)
565            .map(|i| {
566                make_msg(
567                    "user",
568                    Some(&format!("Message number {i} with some padding text here")),
569                    None,
570                )
571            })
572            .collect();
573
574        // Budget: fits ~half the messages but not all 20
575        // 20 msgs × ~50 chars / 3.5 ≈ 286 tokens + 100 overhead ≈ 386
576        // Want to force truncation: set budget to ~250 tokens
577        let result = truncate_until_fits(&msgs, 250);
578        assert!(result.is_some(), "should succeed after truncation");
579        let text = result.unwrap();
580        // Should contain the last messages but not the first
581        assert!(text.contains("Message number 19"));
582        assert!(!text.contains("Message number 0"));
583    }
584
585    #[test]
586    fn test_truncate_until_fits_too_few_messages() {
587        // Only COMPACT_PRESERVE_COUNT + 1 = 5 messages, can't drop any
588        let msgs: Vec<_> = (0..5)
589            .map(|_| make_msg("user", Some(&"x".repeat(10_000)), None))
590            .collect();
591        // Tiny budget that can't fit even 5 messages
592        let result = truncate_until_fits(&msgs, 10);
593        assert!(result.is_none());
594    }
595
596    #[test]
597    fn test_truncate_until_fits_already_fits() {
598        let msgs: Vec<_> = (0..10)
599            .map(|i| make_msg("user", Some(&format!("Short {i}")), None))
600            .collect();
601        // Huge budget
602        let result = truncate_until_fits(&msgs, 100_000);
603        assert!(result.is_some());
604        let text = result.unwrap();
605        // First attempt drops 20% but still fits, so it drops
606        // We just check it returns something valid
607        assert!(text.contains("Short 9"));
608    }
609
610    #[test]
611    fn test_compute_preserve_count_short_sessions() {
612        // Below threshold: always keep COMPACT_PRESERVE_COUNT (4)
613        assert_eq!(compute_preserve_count(4), 4);
614        assert_eq!(compute_preserve_count(8), 4);
615        assert_eq!(compute_preserve_count(11), 4);
616    }
617
618    #[test]
619    fn test_compute_preserve_count_partial() {
620        // At threshold (12): keep ceil(12 * 0.5) = 6
621        assert_eq!(compute_preserve_count(12), 6);
622        // 20 messages: keep ceil(20 * 0.5) = 10
623        assert_eq!(compute_preserve_count(20), 10);
624        // 50 messages: keep 25
625        assert_eq!(compute_preserve_count(50), 25);
626        // 100 messages: keep 50
627        assert_eq!(compute_preserve_count(100), 50);
628    }
629
630    #[test]
631    fn test_compute_preserve_count_never_below_minimum() {
632        // Even at threshold, result must be >= COMPACT_PRESERVE_COUNT
633        for n in 0..200 {
634            assert!(compute_preserve_count(n) >= COMPACT_PRESERVE_COUNT);
635        }
636    }
637
638    // ── build_summary_prompt ────────────────────────────────────────────
639
640    #[test]
641    fn test_build_summary_prompt_embeds_conversation() {
642        let text = build_summary_prompt("[user]: hello\n\n[assistant]: hi\n\n");
643        assert!(
644            text.contains("[user]: hello"),
645            "prompt should embed the conversation text verbatim"
646        );
647        assert!(text.contains("[assistant]: hi"));
648    }
649
650    #[test]
651    fn test_build_summary_prompt_instructs_no_tool_calls() {
652        let text = build_summary_prompt("some conversation");
653        assert!(
654            text.contains("Do NOT call any tools"),
655            "prompt must forbid tool calls"
656        );
657        assert!(text.contains("CRITICAL"));
658    }
659
660    #[test]
661    fn test_build_summary_prompt_requests_analysis_and_summary_tags() {
662        let text = build_summary_prompt("some conversation");
663        assert!(
664            text.contains("<analysis>"),
665            "prompt should ask for <analysis> block"
666        );
667        assert!(
668            text.contains("<summary>"),
669            "prompt should ask for <summary> block"
670        );
671    }
672
673    // ── build_conversation_text edge cases ────────────────────────────
674
675    #[test]
676    fn test_build_conversation_text_tool_calls_truncated_at_500() {
677        let long_tc = "T".repeat(600);
678        let msgs = vec![make_msg("assistant", None, Some(&long_tc))];
679        let text = build_conversation_text(&msgs);
680        // 500 char cap on tool_calls
681        assert!(
682            text.len() <= 550,
683            "tool_calls should be capped at 500 chars"
684        );
685    }
686
687    #[test]
688    fn test_build_conversation_text_both_content_and_tool_calls() {
689        let msgs = vec![make_msg(
690            "assistant",
691            Some("I will read the file"),
692            Some("{\"name\": \"Read\"}"),
693        )];
694        let text = build_conversation_text(&msgs);
695        assert!(text.contains("I will read the file"));
696        assert!(text.contains("tool_calls"));
697    }
698
699    // ── strip_analysis_block edge cases ──────────────────────────────
700
701    #[test]
702    fn test_strip_analysis_unclosed_tag_passthrough() {
703        // If <analysis> has no closing tag, leave the text alone.
704        let input = "<analysis>\nthinking...\n1. Primary Request: build a thing";
705        let result = strip_analysis_block(input);
706        assert!(
707            result.contains("thinking"),
708            "unclosed analysis tag should leave text intact"
709        );
710    }
711
712    #[test]
713    fn test_strip_analysis_trims_extra_whitespace() {
714        let input = "<analysis>\nthink\n</analysis>\n\n\n\n<summary>\nClean content\n</summary>";
715        let result = strip_analysis_block(input);
716        // Collapsed blank lines, no leading/trailing whitespace
717        assert!(!result.starts_with('\n'));
718        assert!(!result.ends_with('\n'));
719        assert_eq!(result, "Clean content");
720    }
721
722    // ── circuit breaker ───────────────────────────────────────────────────────
723    // Covered by test_circuit_breaker above. Removed duplicate tests
724    // that raced on the global CONSECUTIVE_FAILURES AtomicU32 when
725    // running in parallel.
726}