Skip to main content

bamboo_compression/
compression_tooling.rs

1use crate::counter::{TiktokenTokenCounter, TokenCounter};
2use crate::limits::create_budget_for_model;
3use crate::{BudgetStrategy, TokenBudget};
4use bamboo_agent_core::MessagePhase;
5use bamboo_agent_core::{
6    CompressionEvent, CompressionTriggerType, ConversationSummary, Message, Session,
7};
8
9/// Checks if a message is part of a skill tool chain (load_skill / read_skill_resource).
10fn is_skill_tool_chain_message(message: &Message) -> bool {
11    message.tool_calls.as_ref().is_some_and(|calls| {
12        calls.iter().any(|call| {
13            matches!(
14                call.function.name.as_str(),
15                "load_skill" | "read_skill_resource"
16            )
17        })
18    })
19}
20use chrono::Utc;
21use std::collections::HashSet;
22
23/// Structured reason why a compression plan could not be built.
24#[derive(Debug, Clone)]
25pub enum CompressionPlanError {
26    /// The exposure gate (threshold not reached) prevented building.
27    ExposureGateNotMet {
28        usage_percent: f64,
29        trigger_percent: u8,
30    },
31    /// No active messages in the session.
32    NoActiveMessages,
33    /// Not enough non-system messages to compress (need >=3).
34    NotEnoughMessages { non_system_count: usize },
35    /// Nothing to compress after anchor/keep splitting.
36    NothingToCompress {
37        anchor_index: usize,
38        non_system_count: usize,
39    },
40}
41
42impl std::fmt::Display for CompressionPlanError {
43    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
44        match self {
45            Self::ExposureGateNotMet {
46                usage_percent,
47                trigger_percent,
48            } => write!(
49                f,
50                "compression threshold not reached (usage={:.1}%, trigger={}%)",
51                usage_percent, trigger_percent
52            ),
53            Self::NoActiveMessages => write!(f, "no active messages to compress"),
54            Self::NotEnoughMessages { non_system_count } => write!(
55                f,
56                "not enough non-system messages to compress ({}, need >=3)",
57                non_system_count
58            ),
59            Self::NothingToCompress {
60                anchor_index,
61                non_system_count,
62            } => write!(
63                f,
64                "nothing to compress after anchor/keep splitting (anchor_index={}, non_system={})",
65                anchor_index, non_system_count
66            ),
67        }
68    }
69}
70
71/// Metadata about current context pressure, used to decide when compression
72/// should be requested by host-side control flow.
73#[derive(Debug, Clone)]
74pub struct ContextCompressionExposure {
75    pub budget: TokenBudget,
76    pub active_tokens: u32,
77    pub active_usage_percent: f64,
78    pub active_usage_percent_rounded: u8,
79    pub should_expose_tool: bool,
80}
81
82/// A compression plan describing which active historical messages should be
83/// archived and summarized.
84#[derive(Debug, Clone)]
85pub struct CompressionPlan {
86    pub compressed_message_ids: Vec<String>,
87    pub messages_to_summarize: Vec<Message>,
88    pub summary_tokens: u32,
89    pub summary_content: String,
90    pub active_usage_before_percent: f64,
91    pub active_usage_after_percent: f64,
92    pub trigger_percent: u8,
93    pub target_percent: u8,
94    pub segments_removed: usize,
95    pub trigger_type: CompressionTriggerType,
96    pub compression_ratio: f64,
97    pub model_used: Option<String>,
98    pub latency_ms: u64,
99}
100
101pub fn context_window_usage_percent(total_tokens: u32, context_window_tokens: u32) -> f64 {
102    if context_window_tokens == 0 {
103        return 0.0;
104    }
105    (total_tokens as f64 / context_window_tokens as f64) * 100.0
106}
107
108pub fn normalized_trigger_percent(trigger_percent: u8) -> f64 {
109    match trigger_percent {
110        0 => 100.0,
111        1..=100 => trigger_percent as f64,
112        _ => 100.0,
113    }
114}
115
116/// Estimate whether context pressure has crossed the configured threshold for
117/// compression eligibility.
118pub fn estimate_context_compression_exposure(
119    session: &Session,
120    model_name: &str,
121    configured_budget: Option<&TokenBudget>,
122) -> ContextCompressionExposure {
123    let budget = configured_budget
124        .cloned()
125        .unwrap_or_else(|| create_budget_for_model(model_name, BudgetStrategy::default()));
126    let counter = TiktokenTokenCounter::default();
127    let active_messages = active_messages_for_budget(session);
128    let active_message_tokens = counter.count_messages(&active_messages);
129    let summary_tokens = session
130        .conversation_summary
131        .as_ref()
132        .map(|summary| counter.count_messages(&[compression_summary_message(&summary.content)]))
133        .unwrap_or(0);
134    let active_tokens = active_message_tokens.saturating_add(summary_tokens);
135    // Use context window as the denominator for a single, provider-aligned
136    // pressure scale across backend and frontend.
137    let context_window = budget.max_context_tokens;
138    let estimated_usage = context_window_usage_percent(active_tokens, context_window);
139    let usage = session
140        .token_usage
141        .as_ref()
142        .and_then(|token_usage| {
143            let denominator = if token_usage.max_context_tokens > 0 {
144                token_usage.max_context_tokens
145            } else if token_usage.budget_limit > 0 {
146                // Legacy payload compatibility.
147                token_usage.budget_limit
148            } else {
149                context_window
150            };
151            (denominator > 0).then_some(context_window_usage_percent(
152                token_usage.total_tokens,
153                denominator,
154            ))
155        })
156        .map(|persisted_usage| persisted_usage.max(estimated_usage))
157        .unwrap_or(estimated_usage);
158
159    let rounded = usage.clamp(0.0, 100.0).round() as u8;
160    let trigger_tokens = budget.compression_trigger_context_tokens();
161    let trigger_percent = if budget.max_context_tokens > 0 {
162        (trigger_tokens as f64 / budget.max_context_tokens as f64) * 100.0
163    } else {
164        0.0
165    };
166    let threshold_reached = usage >= trigger_percent;
167
168    // Check non-system message count to stay consistent with the plan
169    // building requirement of >=3 non-system messages.  Using
170    // active_messages.len() would include system messages and expose the
171    // tool even when plan building would immediately fail.
172    let non_system_count = active_messages
173        .iter()
174        .filter(|m| !matches!(m.role, bamboo_agent_core::Role::System))
175        .count();
176
177    let should_expose_tool = threshold_reached && non_system_count >= 3;
178
179    ContextCompressionExposure {
180        budget,
181        active_tokens,
182        active_usage_percent: usage,
183        active_usage_percent_rounded: rounded,
184        should_expose_tool,
185    }
186}
187
188/// Build a compression plan that archives older active messages and replaces
189/// them with a caller-provided summary.
190pub fn build_compression_plan_with_summary(
191    session: &Session,
192    model_name: &str,
193    configured_budget: Option<&TokenBudget>,
194    summary_content: String,
195) -> Result<CompressionPlan, CompressionPlanError> {
196    build_compression_plan_with_summary_internal(
197        session,
198        model_name,
199        configured_budget,
200        summary_content,
201        true,
202        CompressionTriggerType::Auto,
203    )
204}
205
206/// Build a compression plan while bypassing "tool exposure" gating.
207///
208/// This is intended for host-enforced fallback paths when context pressure is
209/// critically high and compression must be attempted regardless of the normal
210/// trigger gate.
211pub fn build_forced_compression_plan_with_summary(
212    session: &Session,
213    model_name: &str,
214    configured_budget: Option<&TokenBudget>,
215    summary_content: String,
216    trigger_type: CompressionTriggerType,
217) -> Result<CompressionPlan, CompressionPlanError> {
218    build_compression_plan_with_summary_internal(
219        session,
220        model_name,
221        configured_budget,
222        summary_content,
223        false,
224        trigger_type,
225    )
226}
227
228fn build_compression_plan_with_summary_internal(
229    session: &Session,
230    model_name: &str,
231    configured_budget: Option<&TokenBudget>,
232    summary_content: String,
233    require_exposure_gate: bool,
234    trigger_type: CompressionTriggerType,
235) -> Result<CompressionPlan, CompressionPlanError> {
236    let exposure = estimate_context_compression_exposure(session, model_name, configured_budget);
237    if require_exposure_gate && !exposure.should_expose_tool {
238        return Err(CompressionPlanError::ExposureGateNotMet {
239            usage_percent: exposure.active_usage_percent,
240            trigger_percent: exposure.budget.compression_trigger_percent,
241        });
242    }
243
244    let budget = &exposure.budget;
245    let counter = TiktokenTokenCounter::default();
246    let summary_message = compression_summary_message(&summary_content);
247    let summary_tokens = counter.count_messages(&[summary_message]);
248
249    let context_window = budget.max_context_tokens;
250    let target_limit = budget.compression_target_context_tokens();
251
252    let mut active_messages = active_messages_for_budget(session);
253    if active_messages.is_empty() {
254        tracing::debug!("compression plan: no active messages, cannot build plan");
255        return Err(CompressionPlanError::NoActiveMessages);
256    }
257
258    let system_messages: Vec<Message> = active_messages
259        .iter()
260        .filter(|m| matches!(m.role, bamboo_agent_core::Role::System))
261        .cloned()
262        .collect();
263    let system_tokens = counter.count_messages(&system_messages);
264    let reserved_non_window_tokens = system_tokens.saturating_add(summary_tokens);
265    let window_limit = target_limit.saturating_sub(reserved_non_window_tokens);
266
267    let non_system: Vec<Message> = active_messages
268        .drain(..)
269        .filter(|m| !matches!(m.role, bamboo_agent_core::Role::System))
270        .collect();
271
272    if non_system.len() < 3 {
273        tracing::debug!(
274            "compression plan: not enough non-system messages ({}), need at least 3",
275            non_system.len()
276        );
277        return Err(CompressionPlanError::NotEnoughMessages {
278            non_system_count: non_system.len(),
279        });
280    }
281
282    let user_indexes = non_system
283        .iter()
284        .enumerate()
285        .filter_map(|(index, message)| {
286            matches!(message.role, bamboo_agent_core::Role::User).then_some(index)
287        })
288        .collect::<Vec<_>>();
289    let keep_user_count = user_indexes.len().min(3);
290    let anchor_index = if keep_user_count > 0 {
291        user_indexes[user_indexes.len() - keep_user_count]
292    } else {
293        non_system
294            .iter()
295            .rposition(|m| matches!(m.role, bamboo_agent_core::Role::User))
296            .unwrap_or(non_system.len().saturating_sub(1))
297    };
298    let protected_user_ids: HashSet<String> = if keep_user_count > 0 {
299        user_indexes[user_indexes.len() - keep_user_count..]
300            .iter()
301            .filter_map(|idx| non_system.get(*idx))
302            .map(|message| message.id.clone())
303            .collect()
304    } else {
305        HashSet::new()
306    };
307
308    tracing::debug!(
309        "compression plan: context_window={}, target_limit={}, system_tokens={}, summary_tokens={}, window_limit={}, non_system_messages={}, keep_user_count={}, keep_from_index={}",
310        context_window, target_limit, system_tokens, summary_tokens, window_limit, non_system.len(), keep_user_count, anchor_index
311    );
312
313    // Keep the newest 3 user turns (or fewer if there are not enough user
314    // turns) as active context and summarize older history before that
315    // boundary. If budget is still too high, continue moving the oldest
316    // non-protected messages into the summarize set.
317    let mut messages_to_summarize = non_system[..anchor_index].to_vec();
318
319    // Protected messages must never be summarized — move them to the keep set.
320    let mut never_compress_ids: Vec<String> = messages_to_summarize
321        .iter()
322        .filter(|m| m.never_compress || is_skill_tool_chain_message(m))
323        .map(|m| m.id.clone())
324        .collect();
325
326    // Also protect tool result messages that correspond to skill tool calls.
327    let skill_call_ids: Vec<String> = messages_to_summarize
328        .iter()
329        .filter(|m| is_skill_tool_chain_message(m))
330        .flat_map(|m| m.tool_calls.iter().flatten().map(|c| c.id.clone()))
331        .collect();
332    if !skill_call_ids.is_empty() {
333        for m in &*messages_to_summarize {
334            if let Some(ref call_id) = m.tool_call_id {
335                if skill_call_ids.contains(call_id) && !never_compress_ids.contains(&m.id) {
336                    never_compress_ids.push(m.id.clone());
337                }
338            }
339        }
340    }
341
342    if !never_compress_ids.is_empty() {
343        messages_to_summarize.retain(|m| !never_compress_ids.contains(&m.id));
344    }
345
346    let non_system_count = non_system.len();
347    let mut messages_to_keep = non_system[anchor_index..].to_vec();
348    // Add never_compress / skill messages to the keep set.
349    for id in &never_compress_ids {
350        if let Some(msg) = non_system.iter().find(|m| &m.id == id) {
351            if !messages_to_keep.iter().any(|m| m.id == *id) {
352                messages_to_keep.push(msg.clone());
353            }
354        }
355    }
356
357    while !messages_to_keep.is_empty() {
358        let keep_tokens = counter.count_messages(&messages_to_keep);
359        if keep_tokens <= window_limit {
360            break;
361        }
362
363        let Some(remove_index) = messages_to_keep.iter().position(|message| {
364            !protected_user_ids.contains(message.id.as_str())
365                && !never_compress_ids.contains(&message.id)
366        }) else {
367            // Remaining messages are all protected; stop shrinking.
368            break;
369        };
370        let moved = messages_to_keep.remove(remove_index);
371        messages_to_summarize.push(moved);
372    }
373
374    if messages_to_summarize.is_empty() {
375        tracing::debug!(
376            "compression plan: messages_to_summarize is empty after anchor/keep splitting"
377        );
378        return Err(CompressionPlanError::NothingToCompress {
379            anchor_index,
380            non_system_count,
381        });
382    }
383
384    let compressed_message_ids = messages_to_summarize
385        .iter()
386        .map(|message| message.id.clone())
387        .collect::<Vec<_>>();
388
389    let keep_tokens = counter.count_messages(&messages_to_keep);
390    let active_before = exposure.active_usage_percent;
391    // Use context_window as denominator, consistent with
392    // estimate_context_compression_exposure().
393    let active_after = if context_window == 0 {
394        0.0
395    } else {
396        let after_total = reserved_non_window_tokens.saturating_add(keep_tokens);
397        (after_total as f64 / context_window as f64) * 100.0
398    };
399
400    // Count actual segments being compressed using the same segmenter that
401    // prepare_hybrid_context uses, so the segment count is accurate.
402    let segmenter = crate::segmenter::MessageSegmenter::new();
403    let segments_removed = segmenter.segment(messages_to_summarize.clone()).len();
404
405    Ok(CompressionPlan {
406        compressed_message_ids,
407        messages_to_summarize,
408        summary_tokens,
409        summary_content,
410        active_usage_before_percent: active_before,
411        active_usage_after_percent: active_after,
412        trigger_percent: budget.compression_trigger_percent,
413        target_percent: budget.compression_target_percent,
414        segments_removed,
415        trigger_type,
416        compression_ratio: 0.0,
417        model_used: None,
418        latency_ms: 0,
419    })
420}
421
422/// Apply a previously computed compression plan to the session.
423/// Extract recently modified files from tool calls in the given messages.
424pub(super) fn extract_recently_modified_files(messages: &[Message]) -> Vec<(String, String)> {
425    let mut files = Vec::new();
426    for message in messages {
427        if let Some(ref tool_calls) = message.tool_calls {
428            for call in tool_calls {
429                let tool_name = call.function.name.as_str();
430                if !matches!(tool_name, "Write" | "Edit" | "Bash") {
431                    continue;
432                }
433                let args = &call.function.arguments;
434                if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(args) {
435                    if let Some(path) = parsed.get("file_path").and_then(|v| v.as_str()) {
436                        files.push((path.to_string(), tool_name.to_string()));
437                    } else if let Some(cmd) = parsed.get("command").and_then(|v| v.as_str()) {
438                        // Extract file paths from shell commands heuristically
439                        for part in cmd.split_whitespace() {
440                            if part.contains('/')
441                                && (part.ends_with(".rs")
442                                    || part.ends_with(".ts")
443                                    || part.ends_with(".js")
444                                    || part.ends_with(".toml")
445                                    || part.ends_with(".json")
446                                    || part.ends_with(".md"))
447                            {
448                                files.push((part.to_string(), "Bash".to_string()));
449                            }
450                        }
451                    }
452                }
453            }
454        }
455    }
456    files.truncate(10);
457    files
458}
459
460/// Extract key decision snippets from assistant messages.
461pub(super) fn extract_key_decisions(messages: &[Message], limit: usize) -> Vec<String> {
462    let decision_keywords = [
463        "decided to",
464        "approach is",
465        "use ",
466        "using ",
467        "we'll go with",
468        "the plan is",
469        "strategy:",
470        "solution:",
471        "chose to",
472        "switched to",
473        "refactored to",
474        "migrated to",
475        "replaced with",
476    ];
477    let mut decisions = Vec::new();
478    for message in messages {
479        if !matches!(message.role, bamboo_agent_core::Role::Assistant) {
480            continue;
481        }
482        let content = &message.content;
483        for line in content.lines() {
484            let line_lower = line.to_lowercase();
485            if decision_keywords.iter().any(|kw| line_lower.contains(kw)) {
486                let truncated: String = line.chars().take(200).collect();
487                decisions.push(truncated);
488                if decisions.len() >= limit {
489                    return decisions;
490                }
491            }
492        }
493    }
494    decisions
495}
496
497/// Build a post-compaction recovery message that preserves critical context
498/// from the compressed messages so the LLM can continue work without losing
499/// track of active files, tasks, and decisions.
500fn build_post_compaction_recovery_message(
501    compressed_messages: &[Message],
502    session: &Session,
503) -> Option<Message> {
504    if compressed_messages.is_empty() {
505        return None;
506    }
507
508    let mut sections = Vec::new();
509
510    // 1. Recently modified files
511    let files = extract_recently_modified_files(compressed_messages);
512    if !files.is_empty() {
513        let mut section = String::from("## Recently Modified Files\n");
514        for (path, tool) in &files {
515            section.push_str(&format!("- {} ({})\n", path, tool));
516        }
517        sections.push(section);
518    }
519
520    // 2. Active tasks from task list
521    if let Some(ref task_list) = session.task_list {
522        let active_items: Vec<_> = task_list
523            .items
524            .iter()
525            .filter(|item| !matches!(item.status, bamboo_domain::TaskItemStatus::Completed))
526            .collect();
527        if !active_items.is_empty() {
528            let mut section = String::from("## Active Tasks\n");
529            for item in active_items.iter().take(10) {
530                section.push_str(&format!("- [{:?}] {}\n", item.status, item.description));
531            }
532            sections.push(section);
533        }
534    }
535
536    // 3. Key decisions
537    let decisions = extract_key_decisions(compressed_messages, 5);
538    if !decisions.is_empty() {
539        let mut section = String::from("## Key Decisions\n");
540        for decision in &decisions {
541            section.push_str(&format!("- {}\n", decision));
542        }
543        sections.push(section);
544    }
545
546    if sections.is_empty() {
547        return None;
548    }
549
550    let mut content = String::from("[post-compaction-recovery]\nContext extracted from compressed messages for continued work.\n\n");
551    content.push_str(&sections.join("\n"));
552
553    let mut message = Message::assistant(content, None);
554    message.never_compress = true;
555    Some(message)
556}
557
558struct SummaryQualityMetrics {
559    file_coverage: f64,
560    decision_coverage: f64,
561}
562
563fn validate_summary_quality(summary: &str, messages: &[Message]) -> SummaryQualityMetrics {
564    let files = extract_recently_modified_files(messages);
565    let decisions = extract_key_decisions(messages, 10);
566
567    let files_mentioned = files
568        .iter()
569        .filter(|(path, _)| summary.contains(path.as_str()))
570        .count();
571    let file_coverage = if files.is_empty() {
572        1.0
573    } else {
574        files_mentioned as f64 / files.len() as f64
575    };
576
577    let decisions_mentioned = decisions
578        .iter()
579        .filter(|d| {
580            let check_str: String = d.chars().take(50).collect();
581            summary.contains(&check_str)
582        })
583        .count();
584    let decision_coverage = if decisions.is_empty() {
585        1.0
586    } else {
587        decisions_mentioned as f64 / decisions.len() as f64
588    };
589
590    SummaryQualityMetrics {
591        file_coverage,
592        decision_coverage,
593    }
594}
595
596pub fn apply_compression_plan(session: &mut Session, plan: CompressionPlan) -> usize {
597    let compressed_ids: HashSet<&str> = plan
598        .compressed_message_ids
599        .iter()
600        .map(String::as_str)
601        .collect();
602
603    let mut changed_indexes = Vec::new();
604    for (index, message) in session.messages.iter_mut().enumerate() {
605        if message.compressed || !compressed_ids.contains(message.id.as_str()) {
606            continue;
607        }
608        message.compressed = true;
609        changed_indexes.push(index);
610    }
611
612    if changed_indexes.is_empty() {
613        return 0;
614    }
615
616    let event = CompressionEvent::new(
617        changed_indexes.len(),
618        plan.segments_removed,
619        plan.active_usage_before_percent,
620        plan.active_usage_after_percent,
621        plan.summary_tokens,
622        plan.trigger_type,
623        plan.compression_ratio,
624        plan.model_used.clone(),
625        plan.latency_ms,
626    );
627    let event_id = event.id.clone();
628    for index in changed_indexes {
629        session.messages[index].compressed_by_event_id = Some(event_id.clone());
630    }
631    session.compression_events.push(event);
632    session.conversation_summary = Some(ConversationSummary::new(
633        &plan.summary_content,
634        plan.compressed_message_ids.len(),
635        plan.summary_tokens,
636    ));
637
638    // Inject a post-compaction recovery message to preserve critical context
639    // from the compressed messages (files, tasks, decisions).
640    let compressed_messages: Vec<Message> = session
641        .messages
642        .iter()
643        .filter(|m| compressed_ids.contains(m.id.as_str()))
644        .cloned()
645        .collect();
646    if let Some(recovery) = build_post_compaction_recovery_message(&compressed_messages, session) {
647        // Insert just before the last user message, or at the end
648        let insert_pos = session
649            .messages
650            .iter()
651            .rposition(|m| matches!(m.role, bamboo_agent_core::Role::User) && !m.compressed)
652            .map(|pos| pos + 1)
653            .unwrap_or(session.messages.len());
654        session.messages.insert(insert_pos, recovery);
655    }
656
657    let quality = validate_summary_quality(&plan.summary_content, &compressed_messages);
658    if quality.file_coverage < 0.5 || quality.decision_coverage < 0.3 {
659        tracing::warn!(
660            "[{}] Summary quality: file_coverage={:.0}%, decision_coverage={:.0}%",
661            session.id,
662            quality.file_coverage * 100.0,
663            quality.decision_coverage * 100.0
664        );
665    }
666
667    // Instead of clearing token_usage entirely (which forces the next round
668    // to rely on heuristic estimates that don't account for tool schema
669    // tokens), recompute an approximate post-compression snapshot.  We
670    // preserve the context-window denominator from the previous usage snapshot
671    // so percentages stay consistent across rounds.
672    let counter = TiktokenTokenCounter::default();
673    let remaining_active: Vec<_> = session
674        .messages
675        .iter()
676        .filter(|m| !m.compressed)
677        .cloned()
678        .collect();
679    let system_msgs: Vec<_> = remaining_active
680        .iter()
681        .filter(|m| matches!(m.role, bamboo_agent_core::Role::System))
682        .cloned()
683        .collect();
684    let window_msgs: Vec<_> = remaining_active
685        .iter()
686        .filter(|m| !matches!(m.role, bamboo_agent_core::Role::System))
687        .cloned()
688        .collect();
689    let system_tokens = counter.count_messages(&system_msgs);
690    let new_summary_tokens = plan.summary_tokens;
691    let window_tokens = counter.count_messages(&window_msgs);
692    let total_tokens = system_tokens
693        .saturating_add(new_summary_tokens)
694        .saturating_add(window_tokens);
695    let previous_usage = session.token_usage.take();
696    let budget_limit = previous_usage
697        .as_ref()
698        .map(|u| {
699            if u.max_context_tokens > 0 {
700                u.max_context_tokens
701            } else {
702                u.budget_limit
703            }
704        })
705        .unwrap_or(0);
706    let max_context_tokens = previous_usage
707        .as_ref()
708        .map(|u| u.max_context_tokens)
709        .unwrap_or(0);
710    session.token_usage = Some(bamboo_agent_core::TokenBudgetUsage {
711        system_tokens,
712        summary_tokens: new_summary_tokens,
713        window_tokens,
714        total_tokens,
715        max_context_tokens,
716        budget_limit,
717        truncation_occurred: false,
718        segments_removed: 0,
719        prompt_cached_tool_outputs: 0,
720        prompt_cached_tool_tokens_saved: 0,
721        thinking_tokens: 0,
722        cache_read_input_tokens: 0,
723    });
724
725    session.updated_at = Utc::now();
726    plan.compressed_message_ids.len()
727}
728
729pub fn compression_summary_message(summary_content: &str) -> Message {
730    Message::system(format!(
731        "<!-- CONVERSATION_SUMMARY_START -->\n\
732         ## Previous Conversation Summary\n\
733         The following is compressed historical context for continuity only.\n\
734         It is background memory, not a new user request. Follow the current task list and recent messages over this summary when they conflict.\n\n\
735         {}\n\
736         <!-- CONVERSATION_SUMMARY_END -->",
737        summary_content
738    ))
739}
740
741pub fn active_messages_for_budget(session: &Session) -> Vec<Message> {
742    session
743        .messages
744        .iter()
745        .filter(|message| !message.compressed)
746        .cloned()
747        .collect()
748}
749
750pub fn summary_source_messages(session: &Session) -> Vec<Message> {
751    session
752        .messages
753        .iter()
754        .filter(|message| !message.compressed)
755        .filter(|message| !matches!(message.role, bamboo_agent_core::Role::System))
756        .cloned()
757        .collect()
758}
759
760pub fn build_summary_prompt(
761    session: &Session,
762    messages: &[Message],
763    existing_summary: Option<&str>,
764) -> String {
765    let mut content = String::new();
766    content.push_str(
767        "You are compressing conversation history for continued work. Produce a compact but reliable working-memory summary.\n\n",
768    );
769    content.push_str(
770        "Critical requirements:\n- First capture the in-flight work right before compression (what was being done, where, and with which tool/file)\n- Distinguish clearly between ACTIVE work, COMPLETED work, and OBSOLETE or superseded work\n- Do not restate old tasks as active unless they are still unresolved\n- The current task list is the source of truth for what is actively being worked on\n- Preserve constraints, decisions, file paths, code changes, errors, tool findings, blockers, and the next step\n- If earlier plans conflict with the current task list or newer messages, treat the earlier plans as obsolete or completed\n- Explicitly evaluate each clear user requirement (e.g. requirement 1, requirement 2) with a status and evidence\n- Return only summary text in the same language as the conversation\n\n",
771    );
772
773    if let Some(existing) = existing_summary.map(str::trim).filter(|s| !s.is_empty()) {
774        content.push_str("## Existing Summary\n");
775        content.push_str(existing);
776        content.push_str("\n\n");
777    }
778
779    let task_list_prompt = session.format_task_list_for_prompt();
780    if !task_list_prompt.trim().is_empty() {
781        content.push_str("## Current Task List\n");
782        content.push_str(task_list_prompt.trim());
783        content.push_str("\n\n");
784    }
785
786    content.push_str(
787        "## Required Output Sections\n1. Pre-compression in-flight work (what was being done immediately before compression)\n2. Current active objective\n3. Requirement checklist (Requirement | Status: completed/in_progress/pending/blocked/obsolete | Evidence)\n4. Active tasks\n5. Completed tasks\n6. Obsolete or superseded tasks\n7. Important context and constraints\n8. Files, code, and tool findings\n9. Open issues and next step\n\n",
788    );
789
790    content.push_str("## Messages To Compress\n\n");
791    for message in messages {
792        let role = match message.role {
793            bamboo_agent_core::Role::System => continue,
794            bamboo_agent_core::Role::User => "User",
795            bamboo_agent_core::Role::Assistant => match message.phase {
796                Some(MessagePhase::Commentary) => "Assistant Commentary",
797                Some(MessagePhase::FinalAnswer) => "Assistant Final",
798                None => "Assistant",
799            },
800            bamboo_agent_core::Role::Tool => "Tool Result",
801        };
802
803        content.push_str("### ");
804        content.push_str(role);
805        content.push('\n');
806        if let Some(tool_calls) = &message.tool_calls {
807            if !tool_calls.is_empty() {
808                let names = tool_calls
809                    .iter()
810                    .map(|call| call.function.name.as_str())
811                    .collect::<Vec<_>>()
812                    .join(", ");
813                content.push_str("Called tools: ");
814                content.push_str(&names);
815                content.push('\n');
816            }
817        }
818        if let Some(tool_call_id) = &message.tool_call_id {
819            content.push_str("Tool call id: ");
820            content.push_str(tool_call_id);
821            content.push('\n');
822        }
823        let snippet = truncate_chars(&message.content, 2000);
824        content.push_str(&snippet);
825        content.push_str("\n\n");
826    }
827
828    content.push_str(
829        "Return only the summary text. Be explicit about what is active now versus what is already done or no longer relevant.",
830    );
831    content
832}
833
834fn truncate_chars(value: &str, max_chars: usize) -> String {
835    if value.chars().count() <= max_chars {
836        return value.to_string();
837    }
838    value.chars().take(max_chars).collect::<String>() + "..."
839}
840
841#[cfg(test)]
842mod tests {
843    use super::*;
844    use bamboo_agent_core::TokenBudgetUsage;
845    use bamboo_domain::{FunctionCall, TaskItem, TaskItemStatus, TaskList, ToolCall};
846    use chrono::Utc;
847
848    fn make_budget() -> TokenBudget {
849        TokenBudget {
850            max_context_tokens: 1000,
851            max_output_tokens: 100,
852            strategy: BudgetStrategy::Hybrid {
853                window_size: 20,
854                enable_summarization: true,
855            },
856            safety_margin: 0,
857            compression_trigger_percent: 50,
858            compression_target_percent: 20,
859            working_reserve_tokens: 0,
860            fallback_trigger_percent: 75,
861            prompt_cache_min_tool_output_chars: 1_200,
862            prompt_cache_head_chars: 280,
863            prompt_cache_tail_chars: 180,
864            prompt_cache_recent_user_turns: 2,
865            prompt_cache_recent_tool_chains: 2,
866            max_tool_output_tokens: 0,
867        }
868    }
869
870    fn make_session_with_pressure() -> Session {
871        let mut session = Session::new("compression-hysteresis", "gpt-4o-mini");
872        session.token_budget = Some(make_budget());
873        session.add_message(Message::system("system"));
874        for i in 0..3 {
875            session.add_message(Message::user(format!(
876                "User message {i}: {}",
877                "alpha beta gamma delta epsilon ".repeat(2)
878            )));
879            session.add_message(Message::assistant(
880                format!(
881                    "Assistant message {i}: {}",
882                    "work log decisions next steps ".repeat(2)
883                ),
884                None,
885            ));
886        }
887        session
888    }
889
890    #[test]
891    fn context_window_usage_percent_uses_context_window_denominator() {
892        assert_eq!(context_window_usage_percent(0, 0), 0.0);
893        assert_eq!(context_window_usage_percent(500, 1000), 50.0);
894    }
895
896    #[test]
897    fn estimate_context_compression_exposure_crosses_trigger_when_usage_is_high_enough() {
898        let mut session = make_session_with_pressure();
899        if let Some(budget) = session.token_budget.as_mut() {
900            budget.compression_trigger_percent = 10;
901        }
902        let exposure = estimate_context_compression_exposure(
903            &session,
904            "gpt-4o-mini",
905            session.token_budget.as_ref(),
906        );
907        assert!(exposure.active_usage_percent >= 10.0);
908        assert!(exposure.should_expose_tool);
909    }
910
911    #[test]
912    fn estimate_context_compression_exposure_stays_below_trigger_when_usage_is_low() {
913        let mut session = make_session_with_pressure();
914        if let Some(budget) = session.token_budget.as_mut() {
915            budget.compression_trigger_percent = 99;
916        }
917
918        let exposure = estimate_context_compression_exposure(
919            &session,
920            "gpt-4o-mini",
921            session.token_budget.as_ref(),
922        );
923
924        assert!(exposure.active_usage_percent < 99.0);
925        assert!(!exposure.should_expose_tool);
926    }
927
928    #[test]
929    fn build_summary_prompt_includes_task_list_and_state_sections() {
930        let mut session = Session::new("summary-prompt", "gpt-4o-mini");
931        session.set_task_list(TaskList {
932            session_id: session.id.clone(),
933            title: "Task List".to_string(),
934            items: vec![
935                TaskItem {
936                    id: "task_1".to_string(),
937                    description: "检查 51% 又回落到 50% 的触发逻辑".to_string(),
938                    status: TaskItemStatus::InProgress,
939                    depends_on: Vec::new(),
940                    notes: "避免刚压缩完又立刻再次压缩".to_string(),
941                    ..TaskItem::default()
942                },
943                TaskItem {
944                    id: "task_2".to_string(),
945                    description: "重写 summarizer prompt 并纳入 task list".to_string(),
946                    status: TaskItemStatus::Pending,
947                    depends_on: Vec::new(),
948                    notes: String::new(),
949                    ..TaskItem::default()
950                },
951            ],
952            created_at: Utc::now(),
953            updated_at: Utc::now(),
954        });
955        let prompt = build_summary_prompt(
956            &session,
957            &[
958                Message::user("继续修复 context compression"),
959                Message::assistant("先分析 trigger / target / summary", None),
960            ],
961            Some("old summary"),
962        );
963
964        assert!(prompt.contains("## Current Task List"));
965        assert!(prompt.contains("Current active objective"));
966        assert!(prompt.contains("Requirement checklist"));
967        assert!(prompt.contains("Active tasks"));
968        assert!(prompt.contains("Completed tasks"));
969        assert!(prompt.contains("Obsolete or superseded tasks"));
970        assert!(prompt.contains("检查 51% 又回落到 50% 的触发逻辑"));
971        assert!(prompt.contains("old summary"));
972    }
973
974    #[test]
975    fn forced_plan_keeps_last_three_user_messages_active() {
976        let budget = TokenBudget {
977            max_context_tokens: 1200,
978            max_output_tokens: 100,
979            strategy: BudgetStrategy::Hybrid {
980                window_size: 20,
981                enable_summarization: true,
982            },
983            safety_margin: 0,
984            compression_trigger_percent: 80,
985            compression_target_percent: 20,
986            working_reserve_tokens: 0,
987            fallback_trigger_percent: 75,
988            prompt_cache_min_tool_output_chars: 1_200,
989            prompt_cache_head_chars: 280,
990            prompt_cache_tail_chars: 180,
991            prompt_cache_recent_user_turns: 2,
992            prompt_cache_recent_tool_chains: 2,
993            max_tool_output_tokens: 0,
994        };
995        let mut session = Session::new("keep-last-three-user-turns", "gpt-4o-mini");
996        session.token_budget = Some(budget.clone());
997        session.add_message(Message::system("system"));
998        for i in 0..6 {
999            session.add_message(Message::user(format!(
1000                "U{i}: {}",
1001                "alpha beta gamma ".repeat(8)
1002            )));
1003            session.add_message(Message::assistant(
1004                format!("A{i}: {}", "analysis plan steps ".repeat(8)),
1005                None,
1006            ));
1007        }
1008
1009        let plan = build_forced_compression_plan_with_summary(
1010            &session,
1011            "gpt-4o-mini",
1012            Some(&budget),
1013            "summary".to_string(),
1014            CompressionTriggerType::CriticalOverflow,
1015        )
1016        .expect("forced plan should build");
1017
1018        let compressed_ids = plan
1019            .compressed_message_ids
1020            .iter()
1021            .map(String::as_str)
1022            .collect::<HashSet<_>>();
1023        let kept_user_contents = session
1024            .messages
1025            .iter()
1026            .filter(|message| !matches!(message.role, bamboo_agent_core::Role::System))
1027            .filter(|message| !compressed_ids.contains(message.id.as_str()))
1028            .filter(|message| matches!(message.role, bamboo_agent_core::Role::User))
1029            .map(|message| message.content.clone())
1030            .collect::<Vec<_>>();
1031
1032        assert!(
1033            kept_user_contents.len() >= 3,
1034            "expected to keep at least 3 user messages, got {}",
1035            kept_user_contents.len()
1036        );
1037        assert!(kept_user_contents
1038            .iter()
1039            .any(|content| content.starts_with("U3:")));
1040        assert!(kept_user_contents
1041            .iter()
1042            .any(|content| content.starts_with("U4:")));
1043        assert!(kept_user_contents
1044            .iter()
1045            .any(|content| content.starts_with("U5:")));
1046    }
1047
1048    #[test]
1049    fn estimate_exposure_prefers_persisted_budget_usage_when_higher() {
1050        let mut session = Session::new("persisted-usage", "gpt-4o-mini");
1051        session.token_budget = Some(TokenBudget {
1052            max_context_tokens: 100_000,
1053            max_output_tokens: 1_000,
1054            strategy: BudgetStrategy::Hybrid {
1055                window_size: 20,
1056                enable_summarization: true,
1057            },
1058            safety_margin: 0,
1059            compression_trigger_percent: 80,
1060            compression_target_percent: 50,
1061            working_reserve_tokens: 0,
1062            fallback_trigger_percent: 75,
1063            prompt_cache_min_tool_output_chars: 1_200,
1064            prompt_cache_head_chars: 280,
1065            prompt_cache_tail_chars: 180,
1066            prompt_cache_recent_user_turns: 2,
1067            prompt_cache_recent_tool_chains: 2,
1068            max_tool_output_tokens: 0,
1069        });
1070        session.add_message(Message::system("system"));
1071        session.add_message(Message::user("short"));
1072        session.add_message(Message::assistant("short", None));
1073        session.add_message(Message::user("follow-up"));
1074        session.add_message(Message::assistant("reply", None));
1075        session.token_usage = Some(TokenBudgetUsage {
1076            system_tokens: 100,
1077            summary_tokens: 0,
1078            window_tokens: 95_900,
1079            total_tokens: 96_000,
1080            max_context_tokens: 100_000,
1081            budget_limit: 10_000,
1082            truncation_occurred: true,
1083            segments_removed: 12,
1084            prompt_cached_tool_outputs: 0,
1085            prompt_cached_tool_tokens_saved: 0,
1086            thinking_tokens: 0,
1087            cache_read_input_tokens: 0,
1088        });
1089
1090        let exposure = estimate_context_compression_exposure(
1091            &session,
1092            "gpt-4o-mini",
1093            session.token_budget.as_ref(),
1094        );
1095
1096        assert!(
1097            exposure.active_usage_percent >= 96.0,
1098            "expected persisted context-window usage to drive exposure, got {}",
1099            exposure.active_usage_percent
1100        );
1101        assert!(exposure.should_expose_tool);
1102    }
1103
1104    #[test]
1105    fn never_compress_messages_are_excluded_from_summarize_set() {
1106        let budget = TokenBudget {
1107            max_context_tokens: 1200,
1108            max_output_tokens: 100,
1109            strategy: BudgetStrategy::Hybrid {
1110                window_size: 20,
1111                enable_summarization: true,
1112            },
1113            safety_margin: 0,
1114            compression_trigger_percent: 80,
1115            compression_target_percent: 20,
1116            working_reserve_tokens: 0,
1117            fallback_trigger_percent: 75,
1118            prompt_cache_min_tool_output_chars: 1_200,
1119            prompt_cache_head_chars: 280,
1120            prompt_cache_tail_chars: 180,
1121            prompt_cache_recent_user_turns: 2,
1122            prompt_cache_recent_tool_chains: 2,
1123            max_tool_output_tokens: 0,
1124        };
1125        let mut session = Session::new("never-compress-test", "gpt-4o-mini");
1126        session.token_budget = Some(budget.clone());
1127        session.add_message(Message::system("system"));
1128
1129        // Old user message that should be summarized
1130        session.add_message(Message::user("Old question about X"));
1131        session.add_message(Message::assistant("Old answer about X", None));
1132
1133        // Protected user message (never_compress = true)
1134        let mut protected = Message::user("Critical context that must survive");
1135        protected.never_compress = true;
1136        session.add_message(protected);
1137        session.add_message(Message::assistant("Response to critical", None));
1138
1139        // Recent user messages that anchor the keep window
1140        for i in 0..4 {
1141            session.add_message(Message::user(format!(
1142                "Recent U{i}: {}",
1143                "padding text to fill budget ".repeat(6)
1144            )));
1145            session.add_message(Message::assistant(
1146                format!("Recent A{i}: {}", "reply padding text ".repeat(6)),
1147                None,
1148            ));
1149        }
1150
1151        let plan = build_forced_compression_plan_with_summary(
1152            &session,
1153            "gpt-4o-mini",
1154            Some(&budget),
1155            "summary".to_string(),
1156            CompressionTriggerType::Auto,
1157        )
1158        .expect("plan should build");
1159
1160        let compressed_ids: HashSet<&str> = plan
1161            .compressed_message_ids
1162            .iter()
1163            .map(String::as_str)
1164            .collect();
1165
1166        // Find the never_compress message
1167        let protected_msg = session
1168            .messages
1169            .iter()
1170            .find(|m| m.never_compress)
1171            .expect("should find the protected message");
1172
1173        assert!(
1174            !compressed_ids.contains(protected_msg.id.as_str()),
1175            "never_compress message should NOT be in the compressed set"
1176        );
1177    }
1178
1179    #[test]
1180    fn skill_tool_chain_messages_are_protected_from_compression() {
1181        let budget = TokenBudget {
1182            max_context_tokens: 1200,
1183            max_output_tokens: 100,
1184            strategy: BudgetStrategy::Hybrid {
1185                window_size: 20,
1186                enable_summarization: true,
1187            },
1188            safety_margin: 0,
1189            compression_trigger_percent: 80,
1190            compression_target_percent: 20,
1191            working_reserve_tokens: 0,
1192            fallback_trigger_percent: 75,
1193            prompt_cache_min_tool_output_chars: 1_200,
1194            prompt_cache_head_chars: 280,
1195            prompt_cache_tail_chars: 180,
1196            prompt_cache_recent_user_turns: 2,
1197            prompt_cache_recent_tool_chains: 2,
1198            max_tool_output_tokens: 0,
1199        };
1200        let mut session = Session::new("skill-chain-test", "gpt-4o-mini");
1201        session.token_budget = Some(budget.clone());
1202        session.add_message(Message::system("system"));
1203
1204        // Skill tool chain (load_skill + read_skill_resource)
1205        let mut skill_call = Message::assistant(String::new(), None);
1206        skill_call.tool_calls = Some(vec![ToolCall {
1207            id: "tc-skill".to_string(),
1208            tool_type: "function".to_string(),
1209            function: FunctionCall {
1210                name: "load_skill".to_string(),
1211                arguments: r#"{"skill_id":"my-skill"}"#.to_string(),
1212            },
1213        }]);
1214        session.add_message(skill_call);
1215
1216        let mut skill_result = Message::tool_result("tc-skill", "skill loaded");
1217        skill_result.tool_success = Some(true);
1218        session.add_message(skill_result);
1219
1220        // Regular messages to fill budget
1221        for i in 0..6 {
1222            session.add_message(Message::user(format!(
1223                "U{i}: {}",
1224                "alpha beta gamma delta ".repeat(8)
1225            )));
1226            session.add_message(Message::assistant(
1227                format!("A{i}: {}", "analysis steps plan ".repeat(8)),
1228                None,
1229            ));
1230        }
1231
1232        let plan = build_forced_compression_plan_with_summary(
1233            &session,
1234            "gpt-4o-mini",
1235            Some(&budget),
1236            "summary".to_string(),
1237            CompressionTriggerType::Auto,
1238        )
1239        .expect("plan should build");
1240
1241        let compressed_ids: HashSet<&str> = plan
1242            .compressed_message_ids
1243            .iter()
1244            .map(String::as_str)
1245            .collect();
1246
1247        // Skill tool chain messages should not be compressed
1248        let skill_messages: Vec<&Message> = session
1249            .messages
1250            .iter()
1251            .filter(|m| {
1252                m.tool_calls
1253                    .as_ref()
1254                    .is_some_and(|calls| calls.iter().any(|c| c.function.name == "load_skill"))
1255                    || m.tool_call_id.as_deref() == Some("tc-skill")
1256            })
1257            .collect();
1258
1259        for msg in &skill_messages {
1260            assert!(
1261                !compressed_ids.contains(msg.id.as_str()),
1262                "skill tool chain message {} should NOT be compressed",
1263                msg.id
1264            );
1265        }
1266    }
1267
1268    #[test]
1269    fn recovery_message_returns_none_for_empty_messages() {
1270        let session = Session::new("recovery-empty", "model");
1271        let result = build_post_compaction_recovery_message(&[], &session);
1272        assert!(result.is_none());
1273    }
1274
1275    #[test]
1276    fn recovery_message_has_never_compress_flag() {
1277        let mut session = Session::new("recovery-flag", "model");
1278        let messages = vec![Message::assistant("no decisions here", None)];
1279        session.set_task_list(TaskList {
1280            session_id: session.id.clone(),
1281            title: "Tasks".to_string(),
1282            items: vec![TaskItem {
1283                id: "t1".to_string(),
1284                description: "Active task".to_string(),
1285                status: TaskItemStatus::InProgress,
1286                ..TaskItem::default()
1287            }],
1288            created_at: Utc::now(),
1289            updated_at: Utc::now(),
1290        });
1291        let recovery = build_post_compaction_recovery_message(&messages, &session)
1292            .expect("should return recovery message");
1293        assert!(recovery.never_compress);
1294        assert!(recovery.content.contains("[post-compaction-recovery]"));
1295    }
1296
1297    #[test]
1298    fn recovery_message_extracts_file_paths_from_tool_calls() {
1299        let session = Session::new("recovery-files", "model");
1300        let mut write_call = Message::assistant("writing file", None);
1301        write_call.tool_calls = Some(vec![ToolCall {
1302            id: "tc1".to_string(),
1303            tool_type: "function".to_string(),
1304            function: FunctionCall {
1305                name: "Write".to_string(),
1306                arguments: r#"{"file_path":"/src/main.rs","content":"fn main() {}"}"#.to_string(),
1307            },
1308        }]);
1309        let mut edit_call = Message::assistant("editing file", None);
1310        edit_call.tool_calls = Some(vec![ToolCall {
1311            id: "tc2".to_string(),
1312            tool_type: "function".to_string(),
1313            function: FunctionCall {
1314                name: "Edit".to_string(),
1315                arguments: r#"{"file_path":"/lib/utils.rs","old":"x","new":"y"}"#.to_string(),
1316            },
1317        }]);
1318        let messages = vec![write_call, edit_call];
1319
1320        let recovery = build_post_compaction_recovery_message(&messages, &session)
1321            .expect("should return recovery");
1322        assert!(recovery.content.contains("/src/main.rs"));
1323        assert!(recovery.content.contains("/lib/utils.rs"));
1324        assert!(recovery.content.contains("Recently Modified Files"));
1325    }
1326
1327    #[test]
1328    fn recovery_message_includes_active_tasks() {
1329        let mut session = Session::new("recovery-tasks", "model");
1330        session.set_task_list(TaskList {
1331            session_id: session.id.clone(),
1332            title: "Tasks".to_string(),
1333            items: vec![
1334                TaskItem {
1335                    id: "t1".to_string(),
1336                    description: "Fix auth middleware".to_string(),
1337                    status: TaskItemStatus::InProgress,
1338                    ..TaskItem::default()
1339                },
1340                TaskItem {
1341                    id: "t2".to_string(),
1342                    description: "Add tests".to_string(),
1343                    status: TaskItemStatus::Pending,
1344                    ..TaskItem::default()
1345                },
1346                TaskItem {
1347                    id: "t3".to_string(),
1348                    description: "Done task".to_string(),
1349                    status: TaskItemStatus::Completed,
1350                    ..TaskItem::default()
1351                },
1352            ],
1353            created_at: Utc::now(),
1354            updated_at: Utc::now(),
1355        });
1356        let messages = vec![Message::assistant("some work", None)];
1357
1358        let recovery = build_post_compaction_recovery_message(&messages, &session)
1359            .expect("should return recovery");
1360        assert!(recovery.content.contains("Active Tasks"));
1361        assert!(recovery.content.contains("Fix auth middleware"));
1362        assert!(recovery.content.contains("Add tests"));
1363        // Completed tasks should NOT appear in active tasks
1364        assert!(!recovery.content.contains("Done task"));
1365    }
1366
1367    #[test]
1368    fn apply_compression_plan_injects_recovery_message() {
1369        let budget = TokenBudget {
1370            max_context_tokens: 1200,
1371            max_output_tokens: 100,
1372            strategy: BudgetStrategy::Hybrid {
1373                window_size: 20,
1374                enable_summarization: true,
1375            },
1376            safety_margin: 0,
1377            compression_trigger_percent: 80,
1378            compression_target_percent: 20,
1379            working_reserve_tokens: 0,
1380            fallback_trigger_percent: 75,
1381            prompt_cache_min_tool_output_chars: 1_200,
1382            prompt_cache_head_chars: 280,
1383            prompt_cache_tail_chars: 180,
1384            prompt_cache_recent_user_turns: 2,
1385            prompt_cache_recent_tool_chains: 2,
1386            max_tool_output_tokens: 0,
1387        };
1388        let mut session = Session::new("recovery-inject", "gpt-4o-mini");
1389        session.token_budget = Some(budget.clone());
1390        session.add_message(Message::system("system"));
1391
1392        // Old messages with tool calls containing file paths
1393        let mut write_msg = Message::assistant("writing", None);
1394        write_msg.tool_calls = Some(vec![ToolCall {
1395            id: "tc-w".to_string(),
1396            tool_type: "function".to_string(),
1397            function: FunctionCall {
1398                name: "Write".to_string(),
1399                arguments: r#"{"file_path":"/src/lib.rs","content":"pub fn hello() {}"}"#
1400                    .to_string(),
1401            },
1402        }]);
1403        session.add_message(Message::user("Write the file"));
1404        session.add_message(write_msg);
1405
1406        // Fill with enough messages to force compression
1407        for i in 0..6 {
1408            session.add_message(Message::user(format!(
1409                "U{i}: {}",
1410                "alpha beta gamma delta ".repeat(8)
1411            )));
1412            session.add_message(Message::assistant(
1413                format!("A{i}: {}", "analysis plan ".repeat(8)),
1414                None,
1415            ));
1416        }
1417
1418        let plan = build_forced_compression_plan_with_summary(
1419            &session,
1420            "gpt-4o-mini",
1421            Some(&budget),
1422            "summary text".to_string(),
1423            CompressionTriggerType::Auto,
1424        )
1425        .expect("plan should build");
1426
1427        assert!(!plan.compressed_message_ids.is_empty());
1428
1429        let compressed_count = apply_compression_plan(&mut session, plan);
1430        assert!(compressed_count > 0);
1431
1432        // Verify recovery message was injected
1433        let has_recovery = session.messages.iter().any(|m| {
1434            m.never_compress
1435                && m.content.contains("[post-compaction-recovery]")
1436                && m.content.contains("/src/lib.rs")
1437        });
1438        assert!(
1439            has_recovery,
1440            "session should contain a post-compaction recovery message with the file path"
1441        );
1442    }
1443
1444    #[test]
1445    fn summary_quality_full_coverage_when_all_files_mentioned() {
1446        let messages = vec![{
1447            let mut m = Message::assistant("writing", None);
1448            m.tool_calls = Some(vec![ToolCall {
1449                id: "tc1".to_string(),
1450                tool_type: "function".to_string(),
1451                function: FunctionCall {
1452                    name: "Write".to_string(),
1453                    arguments: r#"{"file_path":"/src/main.rs","content":"fn main() {}"}"#
1454                        .to_string(),
1455                },
1456            }]);
1457            m
1458        }];
1459        let summary = "Modified /src/main.rs to add main function";
1460        let quality = validate_summary_quality(summary, &messages);
1461        assert!(
1462            quality.file_coverage >= 0.99,
1463            "file_coverage should be ~1.0, got {:.2}",
1464            quality.file_coverage
1465        );
1466    }
1467
1468    #[test]
1469    fn summary_quality_zero_coverage_when_no_files_mentioned() {
1470        let messages = vec![{
1471            let mut m = Message::assistant("writing", None);
1472            m.tool_calls = Some(vec![ToolCall {
1473                id: "tc1".to_string(),
1474                tool_type: "function".to_string(),
1475                function: FunctionCall {
1476                    name: "Write".to_string(),
1477                    arguments: r#"{"file_path":"/src/main.rs","content":"fn main() {}"}"#
1478                        .to_string(),
1479                },
1480            }]);
1481            m
1482        }];
1483        let summary = "Summary that mentions nothing about files";
1484        let quality = validate_summary_quality(summary, &messages);
1485        assert!(
1486            quality.file_coverage < 0.01,
1487            "file_coverage should be ~0.0, got {:.2}",
1488            quality.file_coverage
1489        );
1490    }
1491
1492    #[test]
1493    fn summary_quality_handles_empty_messages() {
1494        let quality = validate_summary_quality("some summary", &[]);
1495        assert_eq!(quality.file_coverage, 1.0);
1496        assert_eq!(quality.decision_coverage, 1.0);
1497    }
1498}