Skip to main content

bamboo_compression/
compression_tooling.rs

1use crate::counter::{TiktokenTokenCounter, TokenCounter};
2use crate::limits::create_budget_for_model;
3use crate::{BudgetStrategy, TokenBudget};
4use bamboo_agent_core::MessagePhase;
5use bamboo_agent_core::{
6    CompressionEvent, CompressionTriggerType, ConversationSummary, Message, Session,
7};
8
9/// Checks if a message is part of a skill tool chain (load_skill / read_skill_resource).
10fn is_skill_tool_chain_message(message: &Message) -> bool {
11    message.tool_calls.as_ref().is_some_and(|calls| {
12        calls.iter().any(|call| {
13            matches!(
14                call.function.name.as_str(),
15                "load_skill" | "read_skill_resource"
16            )
17        })
18    })
19}
20use chrono::Utc;
21use std::collections::HashSet;
22
23/// Structured reason why a compression plan could not be built.
24#[derive(Debug, Clone)]
25pub enum CompressionPlanError {
26    /// The exposure gate (threshold not reached) prevented building.
27    ExposureGateNotMet {
28        usage_percent: f64,
29        trigger_percent: u8,
30    },
31    /// No active messages in the session.
32    NoActiveMessages,
33    /// Not enough non-system messages to compress (need >=3).
34    NotEnoughMessages { non_system_count: usize },
35    /// Nothing to compress after anchor/keep splitting.
36    NothingToCompress {
37        anchor_index: usize,
38        non_system_count: usize,
39    },
40}
41
42impl std::fmt::Display for CompressionPlanError {
43    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
44        match self {
45            Self::ExposureGateNotMet {
46                usage_percent,
47                trigger_percent,
48            } => write!(
49                f,
50                "compression threshold not reached (usage={:.1}%, trigger={}%)",
51                usage_percent, trigger_percent
52            ),
53            Self::NoActiveMessages => write!(f, "no active messages to compress"),
54            Self::NotEnoughMessages { non_system_count } => write!(
55                f,
56                "not enough non-system messages to compress ({}, need >=3)",
57                non_system_count
58            ),
59            Self::NothingToCompress {
60                anchor_index,
61                non_system_count,
62            } => write!(
63                f,
64                "nothing to compress after anchor/keep splitting (anchor_index={}, non_system={})",
65                anchor_index, non_system_count
66            ),
67        }
68    }
69}
70
71/// Metadata about current context pressure, used to decide when compression
72/// should be requested by host-side control flow.
73#[derive(Debug, Clone)]
74pub struct ContextCompressionExposure {
75    pub budget: TokenBudget,
76    pub active_tokens: u32,
77    pub active_usage_percent: f64,
78    pub active_usage_percent_rounded: u8,
79    pub should_expose_tool: bool,
80}
81
82/// A compression plan describing which active historical messages should be
83/// archived and summarized.
84#[derive(Debug, Clone)]
85pub struct CompressionPlan {
86    pub compressed_message_ids: Vec<String>,
87    pub messages_to_summarize: Vec<Message>,
88    pub summary_tokens: u32,
89    pub summary_content: String,
90    pub active_usage_before_percent: f64,
91    pub active_usage_after_percent: f64,
92    pub trigger_percent: u8,
93    pub target_percent: u8,
94    pub segments_removed: usize,
95    pub trigger_type: CompressionTriggerType,
96    pub compression_ratio: f64,
97    pub model_used: Option<String>,
98    pub latency_ms: u64,
99}
100
101pub fn context_window_usage_percent(total_tokens: u32, context_window_tokens: u32) -> f64 {
102    if context_window_tokens == 0 {
103        return 0.0;
104    }
105    (total_tokens as f64 / context_window_tokens as f64) * 100.0
106}
107
108pub fn normalized_trigger_percent(trigger_percent: u8) -> f64 {
109    match trigger_percent {
110        0 => 100.0,
111        1..=100 => trigger_percent as f64,
112        _ => 100.0,
113    }
114}
115
116/// Estimate whether context pressure has crossed the configured threshold for
117/// compression eligibility.
118pub fn estimate_context_compression_exposure(
119    session: &Session,
120    model_name: &str,
121    configured_budget: Option<&TokenBudget>,
122) -> ContextCompressionExposure {
123    let budget = configured_budget
124        .cloned()
125        .unwrap_or_else(|| create_budget_for_model(model_name, BudgetStrategy::default()));
126    let counter = TiktokenTokenCounter::default();
127    let active_messages = active_messages_for_budget(session);
128    let active_message_tokens = counter.count_messages(&active_messages);
129    let summary_tokens = session
130        .conversation_summary
131        .as_ref()
132        .map(|summary| counter.count_messages(&[compression_summary_message(&summary.content)]))
133        .unwrap_or(0);
134    let active_tokens = active_message_tokens.saturating_add(summary_tokens);
135    // Use context window as the denominator for a single, provider-aligned
136    // pressure scale across backend and frontend.
137    let context_window = budget.max_context_tokens;
138    let estimated_usage = context_window_usage_percent(active_tokens, context_window);
139    let usage = session
140        .token_usage
141        .as_ref()
142        .and_then(|token_usage| {
143            let denominator = if token_usage.max_context_tokens > 0 {
144                token_usage.max_context_tokens
145            } else if token_usage.budget_limit > 0 {
146                // Legacy payload compatibility.
147                token_usage.budget_limit
148            } else {
149                context_window
150            };
151            (denominator > 0).then_some(context_window_usage_percent(
152                token_usage.total_tokens,
153                denominator,
154            ))
155        })
156        .map(|persisted_usage| persisted_usage.max(estimated_usage))
157        .unwrap_or(estimated_usage);
158
159    let rounded = usage.clamp(0.0, 100.0).round() as u8;
160    let trigger_tokens = budget.compression_trigger_context_tokens();
161    let trigger_percent = if budget.max_context_tokens > 0 {
162        (trigger_tokens as f64 / budget.max_context_tokens as f64) * 100.0
163    } else {
164        0.0
165    };
166    let threshold_reached = usage >= trigger_percent;
167
168    // Check non-system message count to stay consistent with the plan
169    // building requirement of >=3 non-system messages.  Using
170    // active_messages.len() would include system messages and expose the
171    // tool even when plan building would immediately fail.
172    let non_system_count = active_messages
173        .iter()
174        .filter(|m| !matches!(m.role, bamboo_agent_core::Role::System))
175        .count();
176
177    let should_expose_tool = threshold_reached && non_system_count >= 3;
178
179    ContextCompressionExposure {
180        budget,
181        active_tokens,
182        active_usage_percent: usage,
183        active_usage_percent_rounded: rounded,
184        should_expose_tool,
185    }
186}
187
188/// Build a compression plan that archives older active messages and replaces
189/// them with a caller-provided summary.
190pub fn build_compression_plan_with_summary(
191    session: &Session,
192    model_name: &str,
193    configured_budget: Option<&TokenBudget>,
194    summary_content: String,
195) -> Result<CompressionPlan, CompressionPlanError> {
196    build_compression_plan_with_summary_internal(
197        session,
198        model_name,
199        configured_budget,
200        summary_content,
201        true,
202        CompressionTriggerType::Auto,
203    )
204}
205
206/// Build a compression plan while bypassing "tool exposure" gating.
207///
208/// This is intended for host-enforced fallback paths when context pressure is
209/// critically high and compression must be attempted regardless of the normal
210/// trigger gate.
211pub fn build_forced_compression_plan_with_summary(
212    session: &Session,
213    model_name: &str,
214    configured_budget: Option<&TokenBudget>,
215    summary_content: String,
216    trigger_type: CompressionTriggerType,
217) -> Result<CompressionPlan, CompressionPlanError> {
218    build_compression_plan_with_summary_internal(
219        session,
220        model_name,
221        configured_budget,
222        summary_content,
223        false,
224        trigger_type,
225    )
226}
227
228fn build_compression_plan_with_summary_internal(
229    session: &Session,
230    model_name: &str,
231    configured_budget: Option<&TokenBudget>,
232    summary_content: String,
233    require_exposure_gate: bool,
234    trigger_type: CompressionTriggerType,
235) -> Result<CompressionPlan, CompressionPlanError> {
236    let exposure = estimate_context_compression_exposure(session, model_name, configured_budget);
237    if require_exposure_gate && !exposure.should_expose_tool {
238        return Err(CompressionPlanError::ExposureGateNotMet {
239            usage_percent: exposure.active_usage_percent,
240            trigger_percent: exposure.budget.compression_trigger_percent,
241        });
242    }
243
244    let budget = &exposure.budget;
245    let counter = TiktokenTokenCounter::default();
246    let summary_message = compression_summary_message(&summary_content);
247    let summary_tokens = counter.count_messages(&[summary_message]);
248
249    let context_window = budget.max_context_tokens;
250    let target_limit = budget.compression_target_context_tokens();
251
252    let mut active_messages = active_messages_for_budget(session);
253    if active_messages.is_empty() {
254        tracing::debug!("compression plan: no active messages, cannot build plan");
255        return Err(CompressionPlanError::NoActiveMessages);
256    }
257
258    let system_messages: Vec<Message> = active_messages
259        .iter()
260        .filter(|m| matches!(m.role, bamboo_agent_core::Role::System))
261        .cloned()
262        .collect();
263    let system_tokens = counter.count_messages(&system_messages);
264    let reserved_non_window_tokens = system_tokens.saturating_add(summary_tokens);
265    let window_limit = target_limit.saturating_sub(reserved_non_window_tokens);
266
267    let non_system: Vec<Message> = active_messages
268        .drain(..)
269        .filter(|m| !matches!(m.role, bamboo_agent_core::Role::System))
270        .collect();
271
272    if non_system.len() < 3 {
273        tracing::debug!(
274            "compression plan: not enough non-system messages ({}), need at least 3",
275            non_system.len()
276        );
277        return Err(CompressionPlanError::NotEnoughMessages {
278            non_system_count: non_system.len(),
279        });
280    }
281
282    let user_indexes = non_system
283        .iter()
284        .enumerate()
285        .filter_map(|(index, message)| {
286            matches!(message.role, bamboo_agent_core::Role::User).then_some(index)
287        })
288        .collect::<Vec<_>>();
289    let keep_user_count = user_indexes.len().min(3);
290    let anchor_index = if keep_user_count > 0 {
291        user_indexes[user_indexes.len() - keep_user_count]
292    } else {
293        non_system
294            .iter()
295            .rposition(|m| matches!(m.role, bamboo_agent_core::Role::User))
296            .unwrap_or(non_system.len().saturating_sub(1))
297    };
298    let protected_user_ids: HashSet<String> = if keep_user_count > 0 {
299        user_indexes[user_indexes.len() - keep_user_count..]
300            .iter()
301            .filter_map(|idx| non_system.get(*idx))
302            .map(|message| message.id.clone())
303            .collect()
304    } else {
305        HashSet::new()
306    };
307
308    tracing::debug!(
309        "compression plan: context_window={}, target_limit={}, system_tokens={}, summary_tokens={}, window_limit={}, non_system_messages={}, keep_user_count={}, keep_from_index={}",
310        context_window, target_limit, system_tokens, summary_tokens, window_limit, non_system.len(), keep_user_count, anchor_index
311    );
312
313    // Keep the newest 3 user turns (or fewer if there are not enough user
314    // turns) as active context and summarize older history before that
315    // boundary. If budget is still too high, continue moving the oldest
316    // non-protected messages into the summarize set.
317    let mut messages_to_summarize = non_system[..anchor_index].to_vec();
318
319    // Protected messages must never be summarized — move them to the keep set.
320    let mut never_compress_ids: Vec<String> = messages_to_summarize
321        .iter()
322        .filter(|m| m.never_compress || is_skill_tool_chain_message(m))
323        .map(|m| m.id.clone())
324        .collect();
325
326    // Also protect tool result messages that correspond to skill tool calls.
327    let skill_call_ids: Vec<String> = messages_to_summarize
328        .iter()
329        .filter(|m| is_skill_tool_chain_message(m))
330        .flat_map(|m| m.tool_calls.iter().flatten().map(|c| c.id.clone()))
331        .collect();
332    if !skill_call_ids.is_empty() {
333        for m in &*messages_to_summarize {
334            if let Some(ref call_id) = m.tool_call_id {
335                if skill_call_ids.contains(call_id) && !never_compress_ids.contains(&m.id) {
336                    never_compress_ids.push(m.id.clone());
337                }
338            }
339        }
340    }
341
342    if !never_compress_ids.is_empty() {
343        messages_to_summarize.retain(|m| !never_compress_ids.contains(&m.id));
344    }
345
346    let non_system_count = non_system.len();
347    let mut messages_to_keep = non_system[anchor_index..].to_vec();
348    // Add never_compress / skill messages to the keep set.
349    for id in &never_compress_ids {
350        if let Some(msg) = non_system.iter().find(|m| &m.id == id) {
351            if !messages_to_keep.iter().any(|m| m.id == *id) {
352                messages_to_keep.push(msg.clone());
353            }
354        }
355    }
356
357    while !messages_to_keep.is_empty() {
358        let keep_tokens = counter.count_messages(&messages_to_keep);
359        if keep_tokens <= window_limit {
360            break;
361        }
362
363        let Some(remove_index) = messages_to_keep.iter().position(|message| {
364            !protected_user_ids.contains(message.id.as_str())
365                && !never_compress_ids.contains(&message.id)
366        }) else {
367            // Remaining messages are all protected; stop shrinking.
368            break;
369        };
370        let moved = messages_to_keep.remove(remove_index);
371        messages_to_summarize.push(moved);
372    }
373
374    if messages_to_summarize.is_empty() {
375        tracing::debug!(
376            "compression plan: messages_to_summarize is empty after anchor/keep splitting"
377        );
378        return Err(CompressionPlanError::NothingToCompress {
379            anchor_index,
380            non_system_count,
381        });
382    }
383
384    let compressed_message_ids = messages_to_summarize
385        .iter()
386        .map(|message| message.id.clone())
387        .collect::<Vec<_>>();
388
389    let keep_tokens = counter.count_messages(&messages_to_keep);
390    let active_before = exposure.active_usage_percent;
391    // Use context_window as denominator, consistent with
392    // estimate_context_compression_exposure().
393    let active_after = if context_window == 0 {
394        0.0
395    } else {
396        let after_total = reserved_non_window_tokens.saturating_add(keep_tokens);
397        (after_total as f64 / context_window as f64) * 100.0
398    };
399
400    // Count actual segments being compressed using the same segmenter that
401    // prepare_hybrid_context uses, so the segment count is accurate.
402    let segmenter = crate::segmenter::MessageSegmenter::new();
403    let segments_removed = segmenter.segment(messages_to_summarize.clone()).len();
404
405    Ok(CompressionPlan {
406        compressed_message_ids,
407        messages_to_summarize,
408        summary_tokens,
409        summary_content,
410        active_usage_before_percent: active_before,
411        active_usage_after_percent: active_after,
412        trigger_percent: budget.compression_trigger_percent,
413        target_percent: budget.compression_target_percent,
414        segments_removed,
415        trigger_type,
416        compression_ratio: 0.0,
417        model_used: None,
418        latency_ms: 0,
419    })
420}
421
422/// Apply a previously computed compression plan to the session.
423/// Extract recently modified files from tool calls in the given messages.
424pub(super) fn extract_recently_modified_files(messages: &[Message]) -> Vec<(String, String)> {
425    let mut files = Vec::new();
426    for message in messages {
427        if let Some(ref tool_calls) = message.tool_calls {
428            for call in tool_calls {
429                let tool_name = call.function.name.as_str();
430                if !matches!(tool_name, "Write" | "Edit" | "Bash") {
431                    continue;
432                }
433                let args = &call.function.arguments;
434                if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(args) {
435                    if let Some(path) = parsed.get("file_path").and_then(|v| v.as_str()) {
436                        files.push((path.to_string(), tool_name.to_string()));
437                    } else if let Some(cmd) = parsed.get("command").and_then(|v| v.as_str()) {
438                        // Extract file paths from shell commands heuristically
439                        for part in cmd.split_whitespace() {
440                            if part.contains('/')
441                                && (part.ends_with(".rs")
442                                    || part.ends_with(".ts")
443                                    || part.ends_with(".js")
444                                    || part.ends_with(".toml")
445                                    || part.ends_with(".json")
446                                    || part.ends_with(".md"))
447                            {
448                                files.push((part.to_string(), "Bash".to_string()));
449                            }
450                        }
451                    }
452                }
453            }
454        }
455    }
456    files.truncate(10);
457    files
458}
459
460/// Extract key decision snippets from assistant messages.
461pub(super) fn extract_key_decisions(messages: &[Message], limit: usize) -> Vec<String> {
462    let decision_keywords = [
463        "decided to",
464        "approach is",
465        "use ",
466        "using ",
467        "we'll go with",
468        "the plan is",
469        "strategy:",
470        "solution:",
471        "chose to",
472        "switched to",
473        "refactored to",
474        "migrated to",
475        "replaced with",
476    ];
477    let mut decisions = Vec::new();
478    for message in messages {
479        if !matches!(message.role, bamboo_agent_core::Role::Assistant) {
480            continue;
481        }
482        let content = &message.content;
483        for line in content.lines() {
484            let line_lower = line.to_lowercase();
485            if decision_keywords.iter().any(|kw| line_lower.contains(kw)) {
486                let truncated: String = line.chars().take(200).collect();
487                decisions.push(truncated);
488                if decisions.len() >= limit {
489                    return decisions;
490                }
491            }
492        }
493    }
494    decisions
495}
496
497/// Build a post-compaction recovery message that preserves critical context
498/// from the compressed messages so the LLM can continue work without losing
499/// track of active files, tasks, and decisions.
500fn build_post_compaction_recovery_message(
501    compressed_messages: &[Message],
502    session: &Session,
503) -> Option<Message> {
504    if compressed_messages.is_empty() {
505        return None;
506    }
507
508    let mut sections = Vec::new();
509
510    // 1. Recently modified files
511    let files = extract_recently_modified_files(compressed_messages);
512    if !files.is_empty() {
513        let mut section = String::from("## Recently Modified Files\n");
514        for (path, tool) in &files {
515            section.push_str(&format!("- {} ({})\n", path, tool));
516        }
517        sections.push(section);
518    }
519
520    // 2. Active tasks from task list
521    if let Some(ref task_list) = session.task_list {
522        let active_items: Vec<_> = task_list
523            .items
524            .iter()
525            .filter(|item| !matches!(item.status, bamboo_domain::TaskItemStatus::Completed))
526            .collect();
527        if !active_items.is_empty() {
528            let mut section = String::from("## Active Tasks\n");
529            for item in active_items.iter().take(10) {
530                section.push_str(&format!("- [{:?}] {}\n", item.status, item.description));
531            }
532            sections.push(section);
533        }
534    }
535
536    // 3. Key decisions
537    let decisions = extract_key_decisions(compressed_messages, 5);
538    if !decisions.is_empty() {
539        let mut section = String::from("## Key Decisions\n");
540        for decision in &decisions {
541            section.push_str(&format!("- {}\n", decision));
542        }
543        sections.push(section);
544    }
545
546    if sections.is_empty() {
547        return None;
548    }
549
550    let mut content = String::from("[post-compaction-recovery]\nContext extracted from compressed messages for continued work.\n\n");
551    content.push_str(&sections.join("\n"));
552
553    let mut message = Message::assistant(content, None);
554    message.never_compress = true;
555    Some(message)
556}
557
558struct SummaryQualityMetrics {
559    file_coverage: f64,
560    decision_coverage: f64,
561}
562
563fn validate_summary_quality(summary: &str, messages: &[Message]) -> SummaryQualityMetrics {
564    let files = extract_recently_modified_files(messages);
565    let decisions = extract_key_decisions(messages, 10);
566
567    let files_mentioned = files
568        .iter()
569        .filter(|(path, _)| summary.contains(path.as_str()))
570        .count();
571    let file_coverage = if files.is_empty() {
572        1.0
573    } else {
574        files_mentioned as f64 / files.len() as f64
575    };
576
577    let decisions_mentioned = decisions
578        .iter()
579        .filter(|d| {
580            let check_str: String = d.chars().take(50).collect();
581            summary.contains(&check_str)
582        })
583        .count();
584    let decision_coverage = if decisions.is_empty() {
585        1.0
586    } else {
587        decisions_mentioned as f64 / decisions.len() as f64
588    };
589
590    SummaryQualityMetrics {
591        file_coverage,
592        decision_coverage,
593    }
594}
595
596pub fn apply_compression_plan(session: &mut Session, plan: CompressionPlan) -> usize {
597    let compressed_ids: HashSet<&str> = plan
598        .compressed_message_ids
599        .iter()
600        .map(String::as_str)
601        .collect();
602
603    let mut changed_indexes = Vec::new();
604    for (index, message) in session.messages.iter_mut().enumerate() {
605        if message.compressed || !compressed_ids.contains(message.id.as_str()) {
606            continue;
607        }
608        message.compressed = true;
609        changed_indexes.push(index);
610    }
611
612    if changed_indexes.is_empty() {
613        return 0;
614    }
615
616    let event = CompressionEvent::new(
617        changed_indexes.len(),
618        plan.segments_removed,
619        plan.active_usage_before_percent,
620        plan.active_usage_after_percent,
621        plan.summary_tokens,
622        plan.trigger_type,
623        plan.compression_ratio,
624        plan.model_used.clone(),
625        plan.latency_ms,
626    );
627    let event_id = event.id.clone();
628    for index in changed_indexes {
629        session.messages[index].compressed_by_event_id = Some(event_id.clone());
630    }
631    session.compression_events.push(event);
632    session.conversation_summary = Some(ConversationSummary::new(
633        &plan.summary_content,
634        plan.compressed_message_ids.len(),
635        plan.summary_tokens,
636    ));
637
638    // Inject a post-compaction recovery message to preserve critical context
639    // from the compressed messages (files, tasks, decisions).
640    let compressed_messages: Vec<Message> = session
641        .messages
642        .iter()
643        .filter(|m| compressed_ids.contains(m.id.as_str()))
644        .cloned()
645        .collect();
646    if let Some(recovery) = build_post_compaction_recovery_message(&compressed_messages, session) {
647        // Insert just before the last user message, or at the end
648        let insert_pos = session
649            .messages
650            .iter()
651            .rposition(|m| matches!(m.role, bamboo_agent_core::Role::User) && !m.compressed)
652            .map(|pos| pos + 1)
653            .unwrap_or(session.messages.len());
654        session.messages.insert(insert_pos, recovery);
655    }
656
657    let quality = validate_summary_quality(&plan.summary_content, &compressed_messages);
658    if quality.file_coverage < 0.5 || quality.decision_coverage < 0.3 {
659        tracing::warn!(
660            "[{}] Summary quality: file_coverage={:.0}%, decision_coverage={:.0}%",
661            session.id,
662            quality.file_coverage * 100.0,
663            quality.decision_coverage * 100.0
664        );
665    }
666
667    // Instead of clearing token_usage entirely (which forces the next round
668    // to rely on heuristic estimates that don't account for tool schema
669    // tokens), recompute an approximate post-compression snapshot.  We
670    // preserve the context-window denominator from the previous usage snapshot
671    // so percentages stay consistent across rounds.
672    let counter = TiktokenTokenCounter::default();
673    let remaining_active: Vec<_> = session
674        .messages
675        .iter()
676        .filter(|m| !m.compressed)
677        .cloned()
678        .collect();
679    let system_msgs: Vec<_> = remaining_active
680        .iter()
681        .filter(|m| matches!(m.role, bamboo_agent_core::Role::System))
682        .cloned()
683        .collect();
684    let window_msgs: Vec<_> = remaining_active
685        .iter()
686        .filter(|m| !matches!(m.role, bamboo_agent_core::Role::System))
687        .cloned()
688        .collect();
689    let system_tokens = counter.count_messages(&system_msgs);
690    let new_summary_tokens = plan.summary_tokens;
691    let window_tokens = counter.count_messages(&window_msgs);
692    let total_tokens = system_tokens
693        .saturating_add(new_summary_tokens)
694        .saturating_add(window_tokens);
695    let previous_usage = session.token_usage.take();
696    let budget_limit = previous_usage
697        .as_ref()
698        .map(|u| {
699            if u.max_context_tokens > 0 {
700                u.max_context_tokens
701            } else {
702                u.budget_limit
703            }
704        })
705        .unwrap_or(0);
706    let max_context_tokens = previous_usage
707        .as_ref()
708        .map(|u| u.max_context_tokens)
709        .unwrap_or(0);
710    session.token_usage = Some(bamboo_agent_core::TokenBudgetUsage {
711        system_tokens,
712        summary_tokens: new_summary_tokens,
713        window_tokens,
714        total_tokens,
715        max_context_tokens,
716        budget_limit,
717        truncation_occurred: false,
718        segments_removed: 0,
719        prompt_cached_tool_outputs: 0,
720        prompt_cached_tool_tokens_saved: 0,
721        thinking_tokens: 0,
722        cache_read_input_tokens: 0,
723    });
724
725    session.updated_at = Utc::now();
726    plan.compressed_message_ids.len()
727}
728
729pub fn compression_summary_message(summary_content: &str) -> Message {
730    Message::system(format!(
731        "<!-- CONVERSATION_SUMMARY_START -->\n\
732         ## Previous Conversation Summary\n\
733         The following is compressed historical context for continuity only.\n\
734         It is background memory, not a new user request. Follow the current task list and recent messages over this summary when they conflict.\n\n\
735         {}\n\
736         <!-- CONVERSATION_SUMMARY_END -->",
737        summary_content
738    ))
739}
740
741pub fn active_messages_for_budget(session: &Session) -> Vec<Message> {
742    session
743        .messages
744        .iter()
745        .filter(|message| !message.compressed)
746        .cloned()
747        .collect()
748}
749
750pub fn summary_source_messages(session: &Session) -> Vec<Message> {
751    session
752        .messages
753        .iter()
754        .filter(|message| !message.compressed)
755        .filter(|message| !matches!(message.role, bamboo_agent_core::Role::System))
756        .cloned()
757        .collect()
758}
759
760pub fn build_summary_prompt(
761    session: &Session,
762    messages: &[Message],
763    existing_summary: Option<&str>,
764) -> String {
765    let mut content = String::new();
766    content.push_str(
767        "You are compressing conversation history for continued work. Produce a compact but reliable working-memory summary.\n\n",
768    );
769    content.push_str(
770        "Critical requirements:\n- First capture the in-flight work right before compression (what was being done, where, and with which tool/file)\n- Distinguish clearly between ACTIVE work, COMPLETED work, and OBSOLETE or superseded work\n- Do not restate old tasks as active unless they are still unresolved\n- The current task list is the source of truth for what is actively being worked on\n- Preserve constraints, decisions, file paths, code changes, errors, tool findings, blockers, and the next step\n- If earlier plans conflict with the current task list or newer messages, treat the earlier plans as obsolete or completed\n- Explicitly evaluate each clear user requirement (e.g. requirement 1, requirement 2) with a status and evidence\n- Return only summary text in the same language as the conversation\n\n",
771    );
772
773    if let Some(existing) = existing_summary.map(str::trim).filter(|s| !s.is_empty()) {
774        content.push_str("## Existing Summary\n");
775        content.push_str(existing);
776        content.push_str("\n\n");
777    }
778
779    let task_list_prompt = session.format_task_list_for_prompt();
780    if !task_list_prompt.trim().is_empty() {
781        content.push_str("## Current Task List\n");
782        content.push_str(task_list_prompt.trim());
783        content.push_str("\n\n");
784    }
785
786    content.push_str(
787        "## Required Output Sections\n1. Pre-compression in-flight work (what was being done immediately before compression)\n2. Current active objective\n3. Requirement checklist (Requirement | Status: completed/in_progress/pending/blocked/obsolete | Evidence)\n4. Active tasks\n5. Completed tasks\n6. Obsolete or superseded tasks\n7. Important context and constraints\n8. Files, code, and tool findings\n9. Open issues and next step\n\n",
788    );
789
790    content.push_str("## Messages To Compress\n\n");
791    for message in messages {
792        let role = match message.role {
793            bamboo_agent_core::Role::System => continue,
794            bamboo_agent_core::Role::User => "User",
795            bamboo_agent_core::Role::Assistant => match message.phase {
796                Some(MessagePhase::Commentary) => "Assistant Commentary",
797                Some(MessagePhase::FinalAnswer) => "Assistant Final",
798                None => "Assistant",
799            },
800            bamboo_agent_core::Role::Tool => "Tool Result",
801        };
802
803        content.push_str("### ");
804        content.push_str(role);
805        content.push('\n');
806        if let Some(tool_calls) = &message.tool_calls {
807            if !tool_calls.is_empty() {
808                let names = tool_calls
809                    .iter()
810                    .map(|call| call.function.name.as_str())
811                    .collect::<Vec<_>>()
812                    .join(", ");
813                content.push_str("Called tools: ");
814                content.push_str(&names);
815                content.push('\n');
816            }
817        }
818        if let Some(tool_call_id) = &message.tool_call_id {
819            content.push_str("Tool call id: ");
820            content.push_str(tool_call_id);
821            content.push('\n');
822        }
823        let snippet = truncate_chars(&message.content, 2000);
824        content.push_str(&snippet);
825        content.push_str("\n\n");
826    }
827
828    content.push_str(
829        "Return only the summary text. Be explicit about what is active now versus what is already done or no longer relevant.",
830    );
831    content
832}
833
834fn truncate_chars(value: &str, max_chars: usize) -> String {
835    if value.chars().count() <= max_chars {
836        return value.to_string();
837    }
838    value.chars().take(max_chars).collect::<String>() + "..."
839}
840
841#[cfg(test)]
842mod tests {
843    use super::*;
844    use bamboo_agent_core::TokenBudgetUsage;
845    use bamboo_domain::{FunctionCall, TaskItem, TaskItemStatus, TaskList, ToolCall};
846    use chrono::Utc;
847
848    fn make_budget() -> TokenBudget {
849        TokenBudget {
850            max_context_tokens: 1000,
851            max_output_tokens: 100,
852            strategy: BudgetStrategy::Hybrid {
853                window_size: 20,
854                enable_summarization: true,
855            },
856            safety_margin: 0,
857            compression_trigger_percent: 50,
858            compression_target_percent: 20,
859            working_reserve_tokens: 0,
860            fallback_trigger_percent: 75,
861            prompt_cache_min_tool_output_chars: 1_200,
862            prompt_cache_head_chars: 280,
863            prompt_cache_tail_chars: 180,
864            prompt_cache_recent_user_turns: 2,
865            prompt_cache_recent_tool_chains: 2,
866            max_tool_output_tokens: 0,
867        }
868    }
869
870    fn make_session_with_pressure() -> Session {
871        let mut session = Session::new("compression-hysteresis", "gpt-4o-mini");
872        session.token_budget = Some(make_budget());
873        session.add_message(Message::system("system"));
874        for i in 0..3 {
875            session.add_message(Message::user(format!(
876                "User message {i}: {}",
877                "alpha beta gamma delta epsilon ".repeat(2)
878            )));
879            session.add_message(Message::assistant(
880                format!(
881                    "Assistant message {i}: {}",
882                    "work log decisions next steps ".repeat(2)
883                ),
884                None,
885            ));
886        }
887        session
888    }
889
890    #[test]
891    fn context_window_usage_percent_uses_context_window_denominator() {
892        assert_eq!(context_window_usage_percent(0, 0), 0.0);
893        assert_eq!(context_window_usage_percent(500, 1000), 50.0);
894    }
895
896    #[test]
897    fn estimate_context_compression_exposure_crosses_trigger_when_usage_is_high_enough() {
898        let mut session = make_session_with_pressure();
899        if let Some(budget) = session.token_budget.as_mut() {
900            budget.compression_trigger_percent = 10;
901        }
902        let exposure = estimate_context_compression_exposure(
903            &session,
904            "gpt-4o-mini",
905            session.token_budget.as_ref(),
906        );
907        assert!(exposure.active_usage_percent >= 10.0);
908        assert!(exposure.should_expose_tool);
909    }
910
911    #[test]
912    fn estimate_context_compression_exposure_stays_below_trigger_when_usage_is_low() {
913        let mut session = make_session_with_pressure();
914        if let Some(budget) = session.token_budget.as_mut() {
915            budget.compression_trigger_percent = 99;
916        }
917
918        let exposure = estimate_context_compression_exposure(
919            &session,
920            "gpt-4o-mini",
921            session.token_budget.as_ref(),
922        );
923
924        assert!(exposure.active_usage_percent < 99.0);
925        assert!(!exposure.should_expose_tool);
926    }
927
928    #[test]
929    fn build_summary_prompt_includes_task_list_and_state_sections() {
930        let mut session = Session::new("summary-prompt", "gpt-4o-mini");
931        session.set_task_list(TaskList {
932            session_id: session.id.clone(),
933            title: "Task List".to_string(),
934            items: vec![
935                TaskItem {
936                    id: "task_1".to_string(),
937                    description: "检查 51% 又回落到 50% 的触发逻辑".to_string(),
938                    status: TaskItemStatus::InProgress,
939                    depends_on: Vec::new(),
940                    notes: "避免刚压缩完又立刻再次压缩".to_string(),
941                    ..TaskItem::default()
942                },
943                TaskItem {
944                    id: "task_2".to_string(),
945                    description: "重写 summarizer prompt 并纳入 task list".to_string(),
946                    status: TaskItemStatus::Pending,
947                    depends_on: Vec::new(),
948                    notes: String::new(),
949                    ..TaskItem::default()
950                },
951            ],
952            created_at: Utc::now(),
953            updated_at: Utc::now(),
954        });
955        let prompt = build_summary_prompt(
956            &session,
957            &[
958                Message::user("继续修复 context compression"),
959                Message::assistant("先分析 trigger / target / summary", None),
960            ],
961            Some("old summary"),
962        );
963
964        assert!(prompt.contains("## Current Task List"));
965        assert!(prompt.contains("Current active objective"));
966        assert!(prompt.contains("Requirement checklist"));
967        assert!(prompt.contains("Active tasks"));
968        assert!(prompt.contains("Completed tasks"));
969        assert!(prompt.contains("Obsolete or superseded tasks"));
970        assert!(prompt.contains("检查 51% 又回落到 50% 的触发逻辑"));
971        assert!(prompt.contains("old summary"));
972    }
973
974    #[test]
975    fn forced_plan_keeps_last_three_user_messages_active() {
976        let budget = TokenBudget {
977            max_context_tokens: 1200,
978            max_output_tokens: 100,
979            strategy: BudgetStrategy::Hybrid {
980                window_size: 20,
981                enable_summarization: true,
982            },
983            safety_margin: 0,
984            compression_trigger_percent: 80,
985            compression_target_percent: 20,
986            working_reserve_tokens: 0,
987            fallback_trigger_percent: 75,
988            prompt_cache_min_tool_output_chars: 1_200,
989            prompt_cache_head_chars: 280,
990            prompt_cache_tail_chars: 180,
991            prompt_cache_recent_user_turns: 2,
992            prompt_cache_recent_tool_chains: 2,
993            max_tool_output_tokens: 0,
994        };
995        let mut session = Session::new("keep-last-three-user-turns", "gpt-4o-mini");
996        session.token_budget = Some(budget.clone());
997        session.add_message(Message::system("system"));
998        for i in 0..6 {
999            session.add_message(Message::user(format!(
1000                "U{i}: {}",
1001                "alpha beta gamma ".repeat(8)
1002            )));
1003            session.add_message(Message::assistant(
1004                format!("A{i}: {}", "analysis plan steps ".repeat(8)),
1005                None,
1006            ));
1007        }
1008
1009        let plan = build_forced_compression_plan_with_summary(
1010            &session,
1011            "gpt-4o-mini",
1012            Some(&budget),
1013            "summary".to_string(),
1014            CompressionTriggerType::CriticalOverflow,
1015        )
1016        .expect("forced plan should build");
1017
1018        let compressed_ids = plan
1019            .compressed_message_ids
1020            .iter()
1021            .map(String::as_str)
1022            .collect::<HashSet<_>>();
1023        let kept_user_contents = session
1024            .messages
1025            .iter()
1026            .filter(|message| !matches!(message.role, bamboo_agent_core::Role::System))
1027            .filter(|message| !compressed_ids.contains(message.id.as_str()))
1028            .filter(|message| matches!(message.role, bamboo_agent_core::Role::User))
1029            .map(|message| message.content.clone())
1030            .collect::<Vec<_>>();
1031
1032        assert!(
1033            kept_user_contents.len() >= 3,
1034            "expected to keep at least 3 user messages, got {}",
1035            kept_user_contents.len()
1036        );
1037        assert!(kept_user_contents
1038            .iter()
1039            .any(|content| content.starts_with("U3:")));
1040        assert!(kept_user_contents
1041            .iter()
1042            .any(|content| content.starts_with("U4:")));
1043        assert!(kept_user_contents
1044            .iter()
1045            .any(|content| content.starts_with("U5:")));
1046    }
1047
1048    #[test]
1049    fn estimate_exposure_prefers_persisted_budget_usage_when_higher() {
1050        let mut session = Session::new("persisted-usage", "gpt-4o-mini");
1051        session.token_budget = Some(TokenBudget {
1052            max_context_tokens: 100_000,
1053            max_output_tokens: 1_000,
1054            strategy: BudgetStrategy::Hybrid {
1055                window_size: 20,
1056                enable_summarization: true,
1057            },
1058            safety_margin: 0,
1059            compression_trigger_percent: 80,
1060            compression_target_percent: 50,
1061            working_reserve_tokens: 0,
1062            fallback_trigger_percent: 75,
1063            prompt_cache_min_tool_output_chars: 1_200,
1064            prompt_cache_head_chars: 280,
1065            prompt_cache_tail_chars: 180,
1066            prompt_cache_recent_user_turns: 2,
1067            prompt_cache_recent_tool_chains: 2,
1068            max_tool_output_tokens: 0,
1069        });
1070        session.add_message(Message::system("system"));
1071        session.add_message(Message::user("short"));
1072        session.add_message(Message::assistant("short", None));
1073        session.add_message(Message::user("follow-up"));
1074        session.add_message(Message::assistant("reply", None));
1075        session.token_usage = Some(TokenBudgetUsage {
1076            system_tokens: 100,
1077            summary_tokens: 0,
1078            window_tokens: 95_900,
1079            total_tokens: 96_000,
1080            max_context_tokens: 100_000,
1081            budget_limit: 10_000,
1082            truncation_occurred: true,
1083            segments_removed: 12,
1084            prompt_cached_tool_outputs: 0,
1085            thinking_tokens: 0,
1086            cache_read_input_tokens: 0,
1087        });
1088
1089        let exposure = estimate_context_compression_exposure(
1090            &session,
1091            "gpt-4o-mini",
1092            session.token_budget.as_ref(),
1093        );
1094
1095        assert!(
1096            exposure.active_usage_percent >= 96.0,
1097            "expected persisted context-window usage to drive exposure, got {}",
1098            exposure.active_usage_percent
1099        );
1100        assert!(exposure.should_expose_tool);
1101    }
1102
1103    #[test]
1104    fn never_compress_messages_are_excluded_from_summarize_set() {
1105        let budget = TokenBudget {
1106            max_context_tokens: 1200,
1107            max_output_tokens: 100,
1108            strategy: BudgetStrategy::Hybrid {
1109                window_size: 20,
1110                enable_summarization: true,
1111            },
1112            safety_margin: 0,
1113            compression_trigger_percent: 80,
1114            compression_target_percent: 20,
1115            working_reserve_tokens: 0,
1116            fallback_trigger_percent: 75,
1117            prompt_cache_min_tool_output_chars: 1_200,
1118            prompt_cache_head_chars: 280,
1119            prompt_cache_tail_chars: 180,
1120            prompt_cache_recent_user_turns: 2,
1121            prompt_cache_recent_tool_chains: 2,
1122            max_tool_output_tokens: 0,
1123        };
1124        let mut session = Session::new("never-compress-test", "gpt-4o-mini");
1125        session.token_budget = Some(budget.clone());
1126        session.add_message(Message::system("system"));
1127
1128        // Old user message that should be summarized
1129        session.add_message(Message::user("Old question about X"));
1130        session.add_message(Message::assistant("Old answer about X", None));
1131
1132        // Protected user message (never_compress = true)
1133        let mut protected = Message::user("Critical context that must survive");
1134        protected.never_compress = true;
1135        session.add_message(protected);
1136        session.add_message(Message::assistant("Response to critical", None));
1137
1138        // Recent user messages that anchor the keep window
1139        for i in 0..4 {
1140            session.add_message(Message::user(format!(
1141                "Recent U{i}: {}",
1142                "padding text to fill budget ".repeat(6)
1143            )));
1144            session.add_message(Message::assistant(
1145                format!("Recent A{i}: {}", "reply padding text ".repeat(6)),
1146                None,
1147            ));
1148        }
1149
1150        let plan = build_forced_compression_plan_with_summary(
1151            &session,
1152            "gpt-4o-mini",
1153            Some(&budget),
1154            "summary".to_string(),
1155            CompressionTriggerType::Auto,
1156        )
1157        .expect("plan should build");
1158
1159        let compressed_ids: HashSet<&str> = plan
1160            .compressed_message_ids
1161            .iter()
1162            .map(String::as_str)
1163            .collect();
1164
1165        // Find the never_compress message
1166        let protected_msg = session
1167            .messages
1168            .iter()
1169            .find(|m| m.never_compress)
1170            .expect("should find the protected message");
1171
1172        assert!(
1173            !compressed_ids.contains(protected_msg.id.as_str()),
1174            "never_compress message should NOT be in the compressed set"
1175        );
1176    }
1177
1178    #[test]
1179    fn skill_tool_chain_messages_are_protected_from_compression() {
1180        let budget = TokenBudget {
1181            max_context_tokens: 1200,
1182            max_output_tokens: 100,
1183            strategy: BudgetStrategy::Hybrid {
1184                window_size: 20,
1185                enable_summarization: true,
1186            },
1187            safety_margin: 0,
1188            compression_trigger_percent: 80,
1189            compression_target_percent: 20,
1190            working_reserve_tokens: 0,
1191            fallback_trigger_percent: 75,
1192            prompt_cache_min_tool_output_chars: 1_200,
1193            prompt_cache_head_chars: 280,
1194            prompt_cache_tail_chars: 180,
1195            prompt_cache_recent_user_turns: 2,
1196            prompt_cache_recent_tool_chains: 2,
1197            max_tool_output_tokens: 0,
1198        };
1199        let mut session = Session::new("skill-chain-test", "gpt-4o-mini");
1200        session.token_budget = Some(budget.clone());
1201        session.add_message(Message::system("system"));
1202
1203        // Skill tool chain (load_skill + read_skill_resource)
1204        let mut skill_call = Message::assistant(String::new(), None);
1205        skill_call.tool_calls = Some(vec![ToolCall {
1206            id: "tc-skill".to_string(),
1207            tool_type: "function".to_string(),
1208            function: FunctionCall {
1209                name: "load_skill".to_string(),
1210                arguments: r#"{"skill_id":"my-skill"}"#.to_string(),
1211            },
1212        }]);
1213        session.add_message(skill_call);
1214
1215        let mut skill_result = Message::tool_result("tc-skill", "skill loaded");
1216        skill_result.tool_success = Some(true);
1217        session.add_message(skill_result);
1218
1219        // Regular messages to fill budget
1220        for i in 0..6 {
1221            session.add_message(Message::user(format!(
1222                "U{i}: {}",
1223                "alpha beta gamma delta ".repeat(8)
1224            )));
1225            session.add_message(Message::assistant(
1226                format!("A{i}: {}", "analysis steps plan ".repeat(8)),
1227                None,
1228            ));
1229        }
1230
1231        let plan = build_forced_compression_plan_with_summary(
1232            &session,
1233            "gpt-4o-mini",
1234            Some(&budget),
1235            "summary".to_string(),
1236            CompressionTriggerType::Auto,
1237        )
1238        .expect("plan should build");
1239
1240        let compressed_ids: HashSet<&str> = plan
1241            .compressed_message_ids
1242            .iter()
1243            .map(String::as_str)
1244            .collect();
1245
1246        // Skill tool chain messages should not be compressed
1247        let skill_messages: Vec<&Message> = session
1248            .messages
1249            .iter()
1250            .filter(|m| {
1251                m.tool_calls
1252                    .as_ref()
1253                    .is_some_and(|calls| calls.iter().any(|c| c.function.name == "load_skill"))
1254                    || m.tool_call_id.as_deref() == Some("tc-skill")
1255            })
1256            .collect();
1257
1258        for msg in &skill_messages {
1259            assert!(
1260                !compressed_ids.contains(msg.id.as_str()),
1261                "skill tool chain message {} should NOT be compressed",
1262                msg.id
1263            );
1264        }
1265    }
1266
1267    #[test]
1268    fn recovery_message_returns_none_for_empty_messages() {
1269        let session = Session::new("recovery-empty", "model");
1270        let result = build_post_compaction_recovery_message(&[], &session);
1271        assert!(result.is_none());
1272    }
1273
1274    #[test]
1275    fn recovery_message_has_never_compress_flag() {
1276        let mut session = Session::new("recovery-flag", "model");
1277        let messages = vec![Message::assistant("no decisions here", None)];
1278        session.set_task_list(TaskList {
1279            session_id: session.id.clone(),
1280            title: "Tasks".to_string(),
1281            items: vec![TaskItem {
1282                id: "t1".to_string(),
1283                description: "Active task".to_string(),
1284                status: TaskItemStatus::InProgress,
1285                ..TaskItem::default()
1286            }],
1287            created_at: Utc::now(),
1288            updated_at: Utc::now(),
1289        });
1290        let recovery = build_post_compaction_recovery_message(&messages, &session)
1291            .expect("should return recovery message");
1292        assert!(recovery.never_compress);
1293        assert!(recovery.content.contains("[post-compaction-recovery]"));
1294    }
1295
1296    #[test]
1297    fn recovery_message_extracts_file_paths_from_tool_calls() {
1298        let session = Session::new("recovery-files", "model");
1299        let mut write_call = Message::assistant("writing file", None);
1300        write_call.tool_calls = Some(vec![ToolCall {
1301            id: "tc1".to_string(),
1302            tool_type: "function".to_string(),
1303            function: FunctionCall {
1304                name: "Write".to_string(),
1305                arguments: r#"{"file_path":"/src/main.rs","content":"fn main() {}"}"#.to_string(),
1306            },
1307        }]);
1308        let mut edit_call = Message::assistant("editing file", None);
1309        edit_call.tool_calls = Some(vec![ToolCall {
1310            id: "tc2".to_string(),
1311            tool_type: "function".to_string(),
1312            function: FunctionCall {
1313                name: "Edit".to_string(),
1314                arguments: r#"{"file_path":"/lib/utils.rs","old":"x","new":"y"}"#.to_string(),
1315            },
1316        }]);
1317        let messages = vec![write_call, edit_call];
1318
1319        let recovery = build_post_compaction_recovery_message(&messages, &session)
1320            .expect("should return recovery");
1321        assert!(recovery.content.contains("/src/main.rs"));
1322        assert!(recovery.content.contains("/lib/utils.rs"));
1323        assert!(recovery.content.contains("Recently Modified Files"));
1324    }
1325
1326    #[test]
1327    fn recovery_message_includes_active_tasks() {
1328        let mut session = Session::new("recovery-tasks", "model");
1329        session.set_task_list(TaskList {
1330            session_id: session.id.clone(),
1331            title: "Tasks".to_string(),
1332            items: vec![
1333                TaskItem {
1334                    id: "t1".to_string(),
1335                    description: "Fix auth middleware".to_string(),
1336                    status: TaskItemStatus::InProgress,
1337                    ..TaskItem::default()
1338                },
1339                TaskItem {
1340                    id: "t2".to_string(),
1341                    description: "Add tests".to_string(),
1342                    status: TaskItemStatus::Pending,
1343                    ..TaskItem::default()
1344                },
1345                TaskItem {
1346                    id: "t3".to_string(),
1347                    description: "Done task".to_string(),
1348                    status: TaskItemStatus::Completed,
1349                    ..TaskItem::default()
1350                },
1351            ],
1352            created_at: Utc::now(),
1353            updated_at: Utc::now(),
1354        });
1355        let messages = vec![Message::assistant("some work", None)];
1356
1357        let recovery = build_post_compaction_recovery_message(&messages, &session)
1358            .expect("should return recovery");
1359        assert!(recovery.content.contains("Active Tasks"));
1360        assert!(recovery.content.contains("Fix auth middleware"));
1361        assert!(recovery.content.contains("Add tests"));
1362        // Completed tasks should NOT appear in active tasks
1363        assert!(!recovery.content.contains("Done task"));
1364    }
1365
1366    #[test]
1367    fn apply_compression_plan_injects_recovery_message() {
1368        let budget = TokenBudget {
1369            max_context_tokens: 1200,
1370            max_output_tokens: 100,
1371            strategy: BudgetStrategy::Hybrid {
1372                window_size: 20,
1373                enable_summarization: true,
1374            },
1375            safety_margin: 0,
1376            compression_trigger_percent: 80,
1377            compression_target_percent: 20,
1378            working_reserve_tokens: 0,
1379            fallback_trigger_percent: 75,
1380            prompt_cache_min_tool_output_chars: 1_200,
1381            prompt_cache_head_chars: 280,
1382            prompt_cache_tail_chars: 180,
1383            prompt_cache_recent_user_turns: 2,
1384            prompt_cache_recent_tool_chains: 2,
1385            max_tool_output_tokens: 0,
1386        };
1387        let mut session = Session::new("recovery-inject", "gpt-4o-mini");
1388        session.token_budget = Some(budget.clone());
1389        session.add_message(Message::system("system"));
1390
1391        // Old messages with tool calls containing file paths
1392        let mut write_msg = Message::assistant("writing", None);
1393        write_msg.tool_calls = Some(vec![ToolCall {
1394            id: "tc-w".to_string(),
1395            tool_type: "function".to_string(),
1396            function: FunctionCall {
1397                name: "Write".to_string(),
1398                arguments: r#"{"file_path":"/src/lib.rs","content":"pub fn hello() {}"}"#
1399                    .to_string(),
1400            },
1401        }]);
1402        session.add_message(Message::user("Write the file"));
1403        session.add_message(write_msg);
1404
1405        // Fill with enough messages to force compression
1406        for i in 0..6 {
1407            session.add_message(Message::user(format!(
1408                "U{i}: {}",
1409                "alpha beta gamma delta ".repeat(8)
1410            )));
1411            session.add_message(Message::assistant(
1412                format!("A{i}: {}", "analysis plan ".repeat(8)),
1413                None,
1414            ));
1415        }
1416
1417        let plan = build_forced_compression_plan_with_summary(
1418            &session,
1419            "gpt-4o-mini",
1420            Some(&budget),
1421            "summary text".to_string(),
1422            CompressionTriggerType::Auto,
1423        )
1424        .expect("plan should build");
1425
1426        assert!(plan.compressed_message_ids.len() > 0);
1427
1428        let compressed_count = apply_compression_plan(&mut session, plan);
1429        assert!(compressed_count > 0);
1430
1431        // Verify recovery message was injected
1432        let has_recovery = session.messages.iter().any(|m| {
1433            m.never_compress
1434                && m.content.contains("[post-compaction-recovery]")
1435                && m.content.contains("/src/lib.rs")
1436        });
1437        assert!(
1438            has_recovery,
1439            "session should contain a post-compaction recovery message with the file path"
1440        );
1441    }
1442
1443    #[test]
1444    fn summary_quality_full_coverage_when_all_files_mentioned() {
1445        let messages = vec![{
1446            let mut m = Message::assistant("writing", None);
1447            m.tool_calls = Some(vec![ToolCall {
1448                id: "tc1".to_string(),
1449                tool_type: "function".to_string(),
1450                function: FunctionCall {
1451                    name: "Write".to_string(),
1452                    arguments: r#"{"file_path":"/src/main.rs","content":"fn main() {}"}"#
1453                        .to_string(),
1454                },
1455            }]);
1456            m
1457        }];
1458        let summary = "Modified /src/main.rs to add main function";
1459        let quality = validate_summary_quality(summary, &messages);
1460        assert!(
1461            quality.file_coverage >= 0.99,
1462            "file_coverage should be ~1.0, got {:.2}",
1463            quality.file_coverage
1464        );
1465    }
1466
1467    #[test]
1468    fn summary_quality_zero_coverage_when_no_files_mentioned() {
1469        let messages = vec![{
1470            let mut m = Message::assistant("writing", None);
1471            m.tool_calls = Some(vec![ToolCall {
1472                id: "tc1".to_string(),
1473                tool_type: "function".to_string(),
1474                function: FunctionCall {
1475                    name: "Write".to_string(),
1476                    arguments: r#"{"file_path":"/src/main.rs","content":"fn main() {}"}"#
1477                        .to_string(),
1478                },
1479            }]);
1480            m
1481        }];
1482        let summary = "Summary that mentions nothing about files";
1483        let quality = validate_summary_quality(summary, &messages);
1484        assert!(
1485            quality.file_coverage < 0.01,
1486            "file_coverage should be ~0.0, got {:.2}",
1487            quality.file_coverage
1488        );
1489    }
1490
1491    #[test]
1492    fn summary_quality_handles_empty_messages() {
1493        let quality = validate_summary_quality("some summary", &[]);
1494        assert_eq!(quality.file_coverage, 1.0);
1495        assert_eq!(quality.decision_coverage, 1.0);
1496    }
1497}