Skip to main content

koda_core/
compact.rs

1//! Session compaction — summarize old messages to reclaim context.
2//!
3//! When the conversation grows long, compaction replaces older messages with
4//! a concise summary, freeing context window space for new work.
5//!
6//! ## How it works
7//!
8//! 1. **Trigger**: user types `/compact`, or auto-compact fires at ~80% context usage
9//! 2. **Summarization**: a cheap model (Standard tier) generates a summary of old messages
10//! 3. **Replacement**: old messages are archived in the DB, replaced by the summary
11//! 4. **Result**: context usage drops, conversation continues with full history awareness
12//!
13//! ## Auto-compaction
14//!
15//! When context usage exceeds the threshold (configurable, default ~80%),
16//! compaction runs automatically before the next inference call. The user
17//! sees a brief "⚡ Compacting..." indicator.
18//!
19//! ## What's preserved
20//!
21//! - Summary of all prior conversation and decisions
22//! - Progress tracking entries (survive compaction via DB metadata)
23//! - Memory facts (injected from `MEMORY.md`, not from conversation)
24//! - File ownership state (tracked in SQLite, not in messages)
25//!
26//! Pure logic, zero UI dependencies. Returns structured results
27//! for the caller (TUI or headless) to render however it likes.
28//!
29//! Compaction uses a cheap model (Standard tier) when available,
30//! falling back to the main model. Summarization is a simple task
31//! that doesn't need frontier-class reasoning.
32
33use crate::config::ModelSettings;
34use crate::db::Database;
35use crate::persistence::Persistence;
36use crate::providers::{ChatMessage, LlmProvider};
37use anyhow::{Result, bail};
38use std::sync::Arc;
39use std::sync::atomic::{AtomicU32, Ordering};
40use tokio::sync::RwLock;
41
42/// Minimum number of recent messages to keep verbatim during compaction.
43pub const COMPACT_PRESERVE_COUNT: usize = 4;
44
45/// Fraction of history to compact in partial mode (compact oldest half).
46const PARTIAL_COMPACT_FRACTION: f64 = 0.5;
47
48/// Below this message count, always do full compaction (partial is overhead).
49const PARTIAL_COMPACT_THRESHOLD: usize = 12;
50
51/// Stop auto-compacting after this many consecutive failures.
52/// Prevents wasting an API call every turn when compaction is stuck
53/// (e.g. history too large for the model, persistent API errors).
54const MAX_CONSECUTIVE_FAILURES: u32 = 3;
55
56/// Global consecutive failure counter. Shared across the session.
57static CONSECUTIVE_FAILURES: AtomicU32 = AtomicU32::new(0);
58
59/// Reset the failure counter (call after successful compaction or new session).
60pub fn reset_compact_failures() {
61    CONSECUTIVE_FAILURES.store(0, Ordering::Relaxed);
62}
63
64/// Check if the circuit breaker is tripped.
65pub fn is_compact_circuit_broken() -> bool {
66    CONSECUTIVE_FAILURES.load(Ordering::Relaxed) >= MAX_CONSECUTIVE_FAILURES
67}
68
69/// Record a compaction failure. Returns true if the circuit breaker just tripped.
70pub fn record_compact_failure() -> bool {
71    let prev = CONSECUTIVE_FAILURES.fetch_add(1, Ordering::Relaxed);
72    prev + 1 >= MAX_CONSECUTIVE_FAILURES
73}
74
75/// Record a compaction success — resets the failure counter.
76fn record_compact_success() {
77    reset_compact_failures();
78}
79
80/// Maximum number of head-truncation retries when history is too large.
81const MAX_TRUNCATION_RETRIES: usize = 3;
82
83/// Fraction of messages to drop on each truncation attempt.
84const TRUNCATION_DROP_FRACTION: f64 = 0.2;
85
86/// Result of a successful compaction.
87#[derive(Debug)]
88pub struct CompactResult {
89    /// Number of messages deleted from the database.
90    pub deleted: usize,
91    /// Estimated tokens in the summary.
92    pub summary_tokens: usize,
93}
94
95/// Why compaction was skipped (not an error, just a precondition).
96#[derive(Debug)]
97pub enum CompactSkip {
98    /// Session has unresolved tool calls — can't compact safely.
99    PendingToolCalls,
100    /// Session is too short to compact (contains N messages).
101    TooShort(usize),
102    /// History is too large for the current model to summarize without data loss.
103    /// The user should switch to a model with a larger context window or start a new session.
104    HistoryTooLarge,
105}
106
107/// Attempt to compact a session.
108///
109/// Returns `Ok(Ok(result))` on success, `Ok(Err(skip))` if a
110/// precondition prevented compaction, or `Err(e)` on failure.
111pub async fn compact_session(
112    db: &Database,
113    session_id: &str,
114    max_context_tokens: usize,
115    model_settings: &crate::config::ModelSettings,
116    provider: &Arc<RwLock<Box<dyn LlmProvider>>>,
117) -> Result<std::result::Result<CompactResult, CompactSkip>> {
118    let prov = provider.read().await;
119    compact_session_with_provider(db, session_id, max_context_tokens, model_settings, &**prov).await
120}
121
122/// Core compaction logic — accepts `&dyn LlmProvider` directly.
123///
124/// Uses partial compaction for longer sessions (≥12 messages): only the oldest
125/// half of messages are summarized and archived, preserving more recent context
126/// verbatim. Short sessions fall back to full compaction (keep last 4).
127///
128/// Used by the inference loop for pre-flight compaction (where we already
129/// have a `&dyn LlmProvider` and don't need the Arc<RwLock<>> wrapper).
130pub async fn compact_session_with_provider(
131    db: &Database,
132    session_id: &str,
133    max_context_tokens: usize,
134    model_settings: &crate::config::ModelSettings,
135    provider: &dyn LlmProvider,
136) -> Result<std::result::Result<CompactResult, CompactSkip>> {
137    // Check preconditions
138    if db.has_pending_tool_calls(session_id).await.unwrap_or(false) {
139        return Ok(Err(CompactSkip::PendingToolCalls));
140    }
141
142    let history = db.load_context(session_id).await?;
143
144    if history.len() < 4 {
145        return Ok(Err(CompactSkip::TooShort(history.len())));
146    }
147
148    // Decide how many messages to preserve (partial vs full compaction).
149    // Partial: compact the oldest half, keep the newest half.
150    // Full: compact everything except the last COMPACT_PRESERVE_COUNT.
151    let preserve_count = compute_preserve_count(history.len());
152
153    let compact_count = history.len().saturating_sub(preserve_count);
154    if compact_count == 0 {
155        return Ok(Err(CompactSkip::TooShort(history.len())));
156    }
157
158    // Only summarize the messages being compacted, not the ones we're keeping.
159    let to_compact = &history[..compact_count];
160    let conversation_text = build_conversation_text(to_compact);
161
162    tracing::info!(
163        "Compacting {compact_count}/{} messages (preserving {preserve_count})",
164        history.len(),
165    );
166
167    // Check if the conversation text fits in the current model's context.
168    // Reserve 4096 tokens for the summary output + overhead.
169    let text_tokens = (conversation_text.len() as f64 / crate::inference_helpers::CHARS_PER_TOKEN)
170        as usize
171        + crate::inference_helpers::SYSTEM_PROMPT_OVERHEAD;
172    let available = max_context_tokens.saturating_sub(4096);
173
174    // If history fits, use it as-is. Otherwise, progressively truncate
175    // the oldest messages until it fits (up to MAX_TRUNCATION_RETRIES).
176    let final_text = if text_tokens <= available {
177        conversation_text
178    } else {
179        match truncate_until_fits(to_compact, available) {
180            Some(text) => text,
181            None => return Ok(Err(CompactSkip::HistoryTooLarge)),
182        }
183    };
184
185    let summary_prompt = build_summary_prompt(&final_text);
186
187    let messages = vec![ChatMessage::text("user", &summary_prompt)];
188    // Use reduced settings for compaction on the SAME model/provider.
189    // The capacity check above guarantees the conversation text fits.
190    // Savings come from disabling thinking/reasoning, not switching models.
191    let compact_settings = ModelSettings {
192        model: model_settings.model.clone(),
193        max_tokens: Some(4096),
194        temperature: Some(0.3),
195        thinking_budget: None,
196        reasoning_effort: None,
197        max_context_tokens: model_settings.max_context_tokens,
198    };
199    let response = provider.chat(&messages, &[], &compact_settings).await?;
200
201    let summary = match response.content {
202        Some(text) if !text.trim().is_empty() => text,
203        _ => bail!("LLM returned an empty summary"),
204    };
205
206    let summary = strip_analysis_block(&summary);
207    let compact_message = format!("[Compacted conversation summary]\n\n{summary}");
208    let deleted = db
209        .compact_session(session_id, &compact_message, preserve_count)
210        .await?;
211
212    record_compact_success();
213
214    Ok(Ok(CompactResult {
215        deleted,
216        summary_tokens: summary.len() / 4,
217    }))
218}
219
220/// Compute how many messages to preserve during compaction.
221///
222/// - Short sessions (<12 messages): full compaction, keep last 4.
223/// - Longer sessions: partial compaction, keep the newest ~50%.
224///
225/// This preserves more recent context verbatim in long sessions,
226/// producing a smaller, more focused summary of just the oldest half.
227fn compute_preserve_count(total: usize) -> usize {
228    if total < PARTIAL_COMPACT_THRESHOLD {
229        COMPACT_PRESERVE_COUNT
230    } else {
231        let keep = (total as f64 * (1.0 - PARTIAL_COMPACT_FRACTION)).ceil() as usize;
232        keep.max(COMPACT_PRESERVE_COUNT)
233    }
234}
235
236/// Build the 9-section summarization prompt.
237///
238/// Adapted from CC's compaction prompt. Uses an `<analysis>` scratchpad block
239/// (stripped before storing) that demonstrably improves summary quality.
240fn build_summary_prompt(conversation_text: &str) -> String {
241    format!(
242        "CRITICAL: Respond with TEXT ONLY. Do NOT call any tools.\n\
243         Tool calls will be REJECTED and will waste your only turn.\n\
244         Your entire response must be plain text: an <analysis> block followed by a <summary> block.\n\
245         \n\
246         Your task is to create a detailed summary of the conversation so far, paying close \n\
247         attention to the user's explicit requests and your previous actions.\n\
248         This summary should be thorough in capturing technical details, code patterns, and \n\
249         architectural decisions that would be essential for continuing development work \n\
250         without losing context.\n\
251         \n\
252         Before providing your final summary, wrap your analysis in <analysis> tags to \n\
253         organize your thoughts and ensure you've covered all necessary points. In your analysis:\n\
254         \n\
255         1. Chronologically analyze each message. For each section thoroughly identify:\n\
256            - The user's explicit requests and intents\n\
257            - Your approach to addressing them\n\
258            - Key decisions, technical concepts and code patterns\n\
259            - Specific details: file names, code snippets, function signatures, file edits\n\
260            - Errors encountered and how they were fixed\n\
261            - Specific user feedback, especially corrections\n\
262         2. Double-check for technical accuracy and completeness.\n\
263         \n\
264         Your summary should include these sections:\n\
265         \n\
266         1. **Primary Request and Intent**: Capture ALL of the user's explicit requests in detail.\n\
267         2. **Key Technical Concepts**: List all important technologies and frameworks discussed.\n\
268         3. **Files and Code Sections**: Enumerate specific files examined, modified, or created. \n\
269            Include code snippets where applicable and a summary of why each file matters.\n\
270         4. **Errors and Fixes**: List all errors and how they were resolved. Note user feedback.\n\
271         5. **Problem Solving**: Document problems solved and ongoing troubleshooting.\n\
272         6. **All User Messages**: List ALL user messages (not tool results). Critical for \n\
273            preserving feedback and changing intent.\n\
274         7. **Pending Tasks**: Outline anything unfinished or deferred.\n\
275         8. **Current Work**: Describe precisely what was being worked on immediately before \n\
276            this summary. Include file names and code snippets.\n\
277         9. **Optional Next Step**: Only if directly in line with the user's most recent \n\
278            explicit request. Include direct quotes from the conversation to prevent drift.\n\
279         \n\
280         Format your response as:\n\
281         \n\
282         <analysis>\n\
283         [Your thought process ensuring all points are covered]\n\
284         </analysis>\n\
285         \n\
286         <summary>\n\
287         1. Primary Request and Intent:\n\
288            [Detailed description]\n\
289         ...\n\
290         </summary>\n\
291         \n\
292         REMINDER: Do NOT call any tools. Respond with plain text only.\n\
293         \n\
294         ---\n\n{conversation_text}"
295    )
296}
297
298/// Strip the `<analysis>` scratchpad block from the summary.
299///
300/// The analysis block improves summary quality (model "thinks" before writing)
301/// but has no informational value once the summary is written. Stripping it
302/// saves tokens in the ongoing context.
303///
304/// # Examples
305///
306/// ```
307/// use koda_core::compact::strip_analysis_block;
308///
309/// let input = "<analysis>\nthinking...\n</analysis>\n\n<summary>\nThe result.\n</summary>";
310/// let result = strip_analysis_block(input);
311/// assert_eq!(result, "The result.");
312///
313/// // Plain text without tags passes through unchanged:
314/// assert_eq!(strip_analysis_block("just text"), "just text");
315/// ```
316pub fn strip_analysis_block(summary: &str) -> String {
317    // Remove <analysis>...</analysis> including the tags
318    let stripped = if let Some(start) = summary.find("<analysis>") {
319        if let Some(end) = summary.find("</analysis>") {
320            let after = end + "</analysis>".len();
321            format!("{}{}", &summary[..start], &summary[after..])
322        } else {
323            summary.to_string()
324        }
325    } else {
326        summary.to_string()
327    };
328
329    // Extract content from <summary> tags if present
330    let stripped = if let Some(start) = stripped.find("<summary>") {
331        if let Some(end) = stripped.find("</summary>") {
332            let content_start = start + "<summary>".len();
333            stripped[content_start..end].trim().to_string()
334        } else {
335            stripped
336        }
337    } else {
338        stripped
339    };
340
341    // Clean up extra whitespace
342    let mut result = String::new();
343    let mut prev_empty = false;
344    for line in stripped.lines() {
345        let is_empty = line.trim().is_empty();
346        if is_empty && prev_empty {
347            continue;
348        }
349        if !result.is_empty() {
350            result.push('\n');
351        }
352        result.push_str(line);
353        prev_empty = is_empty;
354    }
355    result.trim().to_string()
356}
357
358/// Progressively drop oldest messages until the conversation text fits
359/// in the available token budget. Keeps at least `COMPACT_PRESERVE_COUNT`
360/// recent messages. Returns `None` if it can't fit after max retries.
361fn truncate_until_fits(history: &[crate::db::Message], available_tokens: usize) -> Option<String> {
362    let total = history.len();
363    // Minimum messages to keep: the preserved tail + at least 1 to summarize
364    let min_keep = COMPACT_PRESERVE_COUNT + 1;
365    if total <= min_keep {
366        return None;
367    }
368
369    let mut drop_count = 0usize;
370    for attempt in 0..MAX_TRUNCATION_RETRIES {
371        // Drop 20% of remaining summarizable messages each attempt
372        let summarizable = total.saturating_sub(drop_count);
373        let to_drop = (summarizable as f64 * TRUNCATION_DROP_FRACTION).ceil() as usize;
374        drop_count += to_drop.max(1); // always drop at least 1
375
376        // Never drop so many that we have fewer than min_keep
377        if total.saturating_sub(drop_count) < min_keep {
378            drop_count = total - min_keep;
379        }
380
381        let truncated = &history[drop_count..];
382        let text = build_conversation_text(truncated);
383        let text_tokens = (text.len() as f64 / crate::inference_helpers::CHARS_PER_TOKEN) as usize
384            + crate::inference_helpers::SYSTEM_PROMPT_OVERHEAD;
385
386        tracing::info!(
387            "Truncation attempt {}: dropped {drop_count}/{total} messages, \
388             ~{text_tokens} tokens (budget: {available_tokens})",
389            attempt + 1,
390        );
391
392        if text_tokens <= available_tokens {
393            return Some(text);
394        }
395    }
396
397    None
398}
399
400/// Format conversation history into a single string for the summarizer.
401///
402/// Per-message content is truncated to 2000 chars (individual tool outputs
403/// can be huge but add little summarization value beyond a preview).
404/// No total cap — the capacity check in `compact_session_with_provider`
405/// guarantees the result fits in the model's context window.
406fn build_conversation_text(history: &[crate::db::Message]) -> String {
407    let mut text = String::new();
408    for msg in history {
409        let role = msg.role.as_str();
410        if let Some(ref content) = msg.content {
411            let truncated: String = content.chars().take(2000).collect();
412            text.push_str(&format!("[{role}]: {truncated}\n\n"));
413        }
414        if let Some(ref tool_calls) = msg.tool_calls {
415            let truncated: String = tool_calls.chars().take(500).collect();
416            text.push_str(&format!("[{role} tool_calls]: {truncated}\n\n"));
417        }
418    }
419    text
420}
421
422#[cfg(test)]
423mod tests {
424    use super::*;
425    use crate::db::Message;
426
427    fn make_msg(role: &str, content: Option<&str>, tool_calls: Option<&str>) -> Message {
428        Message {
429            id: 0,
430            session_id: String::new(),
431            role: role.parse().unwrap_or(crate::db::Role::User),
432            content: content.map(String::from),
433            full_content: None,
434            tool_calls: tool_calls.map(String::from),
435            tool_call_id: None,
436            prompt_tokens: None,
437            completion_tokens: None,
438            cache_read_tokens: None,
439            cache_creation_tokens: None,
440            thinking_tokens: None,
441            thinking_content: None,
442            created_at: None,
443        }
444    }
445
446    #[test]
447    fn test_circuit_breaker() {
448        reset_compact_failures();
449        assert!(!is_compact_circuit_broken());
450
451        assert!(!record_compact_failure()); // 1st
452        assert!(!is_compact_circuit_broken());
453
454        assert!(!record_compact_failure()); // 2nd
455        assert!(!is_compact_circuit_broken());
456
457        assert!(record_compact_failure()); // 3rd — trips
458        assert!(is_compact_circuit_broken());
459
460        // Reset should untrip
461        reset_compact_failures();
462        assert!(!is_compact_circuit_broken());
463    }
464
465    #[test]
466    fn test_empty_history() {
467        assert_eq!(build_conversation_text(&[]), "");
468    }
469
470    #[test]
471    fn test_basic_conversation() {
472        let msgs = vec![
473            make_msg("user", Some("hello"), None),
474            make_msg("assistant", Some("hi"), None),
475        ];
476        let text = build_conversation_text(&msgs);
477        assert!(text.contains("[user]: hello"));
478        assert!(text.contains("[assistant]: hi"));
479    }
480
481    #[test]
482    fn test_truncates_long_content_per_message() {
483        let long = "x".repeat(3000);
484        let msgs = vec![make_msg("user", Some(&long), None)];
485        let text = build_conversation_text(&msgs);
486        // Each msg content capped at 2000 chars
487        assert!(text.len() < 2100);
488    }
489
490    #[test]
491    fn test_no_total_cap() {
492        // 50 messages × 500 chars each = 25K chars — no cap applied
493        let content = "y".repeat(500);
494        let msgs: Vec<_> = (0..50)
495            .map(|_| make_msg("user", Some(&content), None))
496            .collect();
497        let text = build_conversation_text(&msgs);
498        // All 50 messages should be included (no 20K cap)
499        assert!(text.len() > 20_000);
500        assert!(!text.contains("truncated"));
501    }
502
503    #[test]
504    fn test_multibyte_boundary_safe() {
505        // Put emoji right at the 2000-char boundary
506        let mut content = "a".repeat(1999);
507        content.push('\u{1f43b}'); // bear emoji (4 bytes)
508        content.push_str("after");
509        let msgs = vec![make_msg("user", Some(&content), None)];
510        let text = build_conversation_text(&msgs);
511        // Should not panic on char boundary
512        assert!(text.contains("\u{1f43b}") || !text.contains("after"));
513    }
514
515    #[test]
516    fn test_tool_calls_included() {
517        let msgs = vec![make_msg("assistant", None, Some("{\"name\": \"Read\"}"))];
518        let text = build_conversation_text(&msgs);
519        assert!(text.contains("tool_calls"));
520        assert!(text.contains("Read"));
521    }
522
523    #[test]
524    fn test_none_content_skipped() {
525        let msgs = vec![make_msg("tool", None, None)];
526        let text = build_conversation_text(&msgs);
527        assert_eq!(text, "");
528    }
529
530    #[test]
531    fn test_strip_analysis_block() {
532        let input = "<analysis>\nthinking here\n</analysis>\n\n<summary>\n1. Primary Request:\n   Build a thing\n</summary>";
533        let result = strip_analysis_block(input);
534        assert!(result.contains("Primary Request"));
535        assert!(!result.contains("<analysis>"));
536        assert!(!result.contains("thinking here"));
537        assert!(!result.contains("<summary>"));
538    }
539
540    #[test]
541    fn test_strip_analysis_no_tags() {
542        let input = "Just a plain summary";
543        assert_eq!(strip_analysis_block(input), "Just a plain summary");
544    }
545
546    #[test]
547    fn test_strip_analysis_only_summary_tags() {
548        let input = "<summary>\nThe good stuff\n</summary>";
549        let result = strip_analysis_block(input);
550        assert_eq!(result, "The good stuff");
551    }
552
553    #[test]
554    fn test_truncate_until_fits_drops_oldest() {
555        // 20 messages, each ~50 chars
556        let msgs: Vec<_> = (0..20)
557            .map(|i| {
558                make_msg(
559                    "user",
560                    Some(&format!("Message number {i} with some padding text here")),
561                    None,
562                )
563            })
564            .collect();
565
566        // Budget: fits ~half the messages but not all 20
567        // 20 msgs × ~50 chars / 3.5 ≈ 286 tokens + 100 overhead ≈ 386
568        // Want to force truncation: set budget to ~250 tokens
569        let result = truncate_until_fits(&msgs, 250);
570        assert!(result.is_some(), "should succeed after truncation");
571        let text = result.unwrap();
572        // Should contain the last messages but not the first
573        assert!(text.contains("Message number 19"));
574        assert!(!text.contains("Message number 0"));
575    }
576
577    #[test]
578    fn test_truncate_until_fits_too_few_messages() {
579        // Only COMPACT_PRESERVE_COUNT + 1 = 5 messages, can't drop any
580        let msgs: Vec<_> = (0..5)
581            .map(|_| make_msg("user", Some(&"x".repeat(10_000)), None))
582            .collect();
583        // Tiny budget that can't fit even 5 messages
584        let result = truncate_until_fits(&msgs, 10);
585        assert!(result.is_none());
586    }
587
588    #[test]
589    fn test_truncate_until_fits_already_fits() {
590        let msgs: Vec<_> = (0..10)
591            .map(|i| make_msg("user", Some(&format!("Short {i}")), None))
592            .collect();
593        // Huge budget
594        let result = truncate_until_fits(&msgs, 100_000);
595        assert!(result.is_some());
596        let text = result.unwrap();
597        // First attempt drops 20% but still fits, so it drops
598        // We just check it returns something valid
599        assert!(text.contains("Short 9"));
600    }
601
602    #[test]
603    fn test_compute_preserve_count_short_sessions() {
604        // Below threshold: always keep COMPACT_PRESERVE_COUNT (4)
605        assert_eq!(compute_preserve_count(4), 4);
606        assert_eq!(compute_preserve_count(8), 4);
607        assert_eq!(compute_preserve_count(11), 4);
608    }
609
610    #[test]
611    fn test_compute_preserve_count_partial() {
612        // At threshold (12): keep ceil(12 * 0.5) = 6
613        assert_eq!(compute_preserve_count(12), 6);
614        // 20 messages: keep ceil(20 * 0.5) = 10
615        assert_eq!(compute_preserve_count(20), 10);
616        // 50 messages: keep 25
617        assert_eq!(compute_preserve_count(50), 25);
618        // 100 messages: keep 50
619        assert_eq!(compute_preserve_count(100), 50);
620    }
621
622    #[test]
623    fn test_compute_preserve_count_never_below_minimum() {
624        // Even at threshold, result must be >= COMPACT_PRESERVE_COUNT
625        for n in 0..200 {
626            assert!(compute_preserve_count(n) >= COMPACT_PRESERVE_COUNT);
627        }
628    }
629
630    // ── build_summary_prompt ────────────────────────────────────────────
631
632    #[test]
633    fn test_build_summary_prompt_embeds_conversation() {
634        let text = build_summary_prompt("[user]: hello\n\n[assistant]: hi\n\n");
635        assert!(
636            text.contains("[user]: hello"),
637            "prompt should embed the conversation text verbatim"
638        );
639        assert!(text.contains("[assistant]: hi"));
640    }
641
642    #[test]
643    fn test_build_summary_prompt_instructs_no_tool_calls() {
644        let text = build_summary_prompt("some conversation");
645        assert!(
646            text.contains("Do NOT call any tools"),
647            "prompt must forbid tool calls"
648        );
649        assert!(text.contains("CRITICAL"));
650    }
651
652    #[test]
653    fn test_build_summary_prompt_requests_analysis_and_summary_tags() {
654        let text = build_summary_prompt("some conversation");
655        assert!(
656            text.contains("<analysis>"),
657            "prompt should ask for <analysis> block"
658        );
659        assert!(
660            text.contains("<summary>"),
661            "prompt should ask for <summary> block"
662        );
663    }
664
665    // ── build_conversation_text edge cases ────────────────────────────
666
667    #[test]
668    fn test_build_conversation_text_tool_calls_truncated_at_500() {
669        let long_tc = "T".repeat(600);
670        let msgs = vec![make_msg("assistant", None, Some(&long_tc))];
671        let text = build_conversation_text(&msgs);
672        // 500 char cap on tool_calls
673        assert!(
674            text.len() <= 550,
675            "tool_calls should be capped at 500 chars"
676        );
677    }
678
679    #[test]
680    fn test_build_conversation_text_both_content_and_tool_calls() {
681        let msgs = vec![make_msg(
682            "assistant",
683            Some("I will read the file"),
684            Some("{\"name\": \"Read\"}"),
685        )];
686        let text = build_conversation_text(&msgs);
687        assert!(text.contains("I will read the file"));
688        assert!(text.contains("tool_calls"));
689    }
690
691    // ── strip_analysis_block edge cases ──────────────────────────────
692
693    #[test]
694    fn test_strip_analysis_unclosed_tag_passthrough() {
695        // If <analysis> has no closing tag, leave the text alone.
696        let input = "<analysis>\nthinking...\n1. Primary Request: build a thing";
697        let result = strip_analysis_block(input);
698        assert!(
699            result.contains("thinking"),
700            "unclosed analysis tag should leave text intact"
701        );
702    }
703
704    #[test]
705    fn test_strip_analysis_trims_extra_whitespace() {
706        let input = "<analysis>\nthink\n</analysis>\n\n\n\n<summary>\nClean content\n</summary>";
707        let result = strip_analysis_block(input);
708        // Collapsed blank lines, no leading/trailing whitespace
709        assert!(!result.starts_with('\n'));
710        assert!(!result.ends_with('\n'));
711        assert_eq!(result, "Clean content");
712    }
713
714    // ── circuit breaker ───────────────────────────────────────────────────────
715    // Covered by test_circuit_breaker above. Removed duplicate tests
716    // that raced on the global CONSECUTIVE_FAILURES AtomicU32 when
717    // running in parallel.
718}