Skip to main content

bamboo_compression/
compression_tooling.rs

1use crate::counter::{TiktokenTokenCounter, TokenCounter};
2use crate::limits::{create_budget_for_model, ModelLimitsRegistry};
3use crate::{BudgetStrategy, TokenBudget};
4use bamboo_domain::MessagePhase;
5use bamboo_domain::{
6    CompressionEvent, CompressionTriggerType, ConversationSummary, Message, Session,
7};
8
9/// Checks if a message is part of a skill tool chain (load_skill / read_skill_resource).
10fn is_skill_tool_chain_message(message: &Message) -> bool {
11    message.tool_calls.as_ref().is_some_and(|calls| {
12        calls.iter().any(|call| {
13            matches!(
14                call.function.name.as_str(),
15                "load_skill" | "read_skill_resource"
16            )
17        })
18    })
19}
20use chrono::Utc;
21use std::collections::HashSet;
22
23/// Structured reason why a compression plan could not be built.
24#[derive(Debug, Clone)]
25pub enum CompressionPlanError {
26    /// The exposure gate (threshold not reached) prevented building.
27    ExposureGateNotMet {
28        usage_percent: f64,
29        trigger_percent: u8,
30    },
31    /// No active messages in the session.
32    NoActiveMessages,
33    /// Not enough non-system messages to compress (need >=3).
34    NotEnoughMessages { non_system_count: usize },
35    /// Nothing to compress after anchor/keep splitting.
36    NothingToCompress {
37        anchor_index: usize,
38        non_system_count: usize,
39    },
40}
41
42impl std::fmt::Display for CompressionPlanError {
43    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
44        match self {
45            Self::ExposureGateNotMet {
46                usage_percent,
47                trigger_percent,
48            } => write!(
49                f,
50                "compression threshold not reached (usage={:.1}%, trigger={}%)",
51                usage_percent, trigger_percent
52            ),
53            Self::NoActiveMessages => write!(f, "no active messages to compress"),
54            Self::NotEnoughMessages { non_system_count } => write!(
55                f,
56                "not enough non-system messages to compress ({}, need >=3)",
57                non_system_count
58            ),
59            Self::NothingToCompress {
60                anchor_index,
61                non_system_count,
62            } => write!(
63                f,
64                "nothing to compress after anchor/keep splitting (anchor_index={}, non_system={})",
65                anchor_index, non_system_count
66            ),
67        }
68    }
69}
70
71/// Metadata about current context pressure, used to decide when compression
72/// should be requested by host-side control flow.
73#[derive(Debug, Clone)]
74pub struct ContextCompressionExposure {
75    pub budget: TokenBudget,
76    pub active_tokens: u32,
77    pub active_usage_percent: f64,
78    pub active_usage_percent_rounded: u8,
79    pub should_expose_tool: bool,
80}
81
82/// A compression plan describing which active historical messages should be
83/// archived and summarized.
84#[derive(Debug, Clone)]
85pub struct CompressionPlan {
86    pub compressed_message_ids: Vec<String>,
87    pub messages_to_summarize: Vec<Message>,
88    pub summary_tokens: u32,
89    pub summary_content: String,
90    pub active_usage_before_percent: f64,
91    pub active_usage_after_percent: f64,
92    pub trigger_percent: u8,
93    pub target_percent: u8,
94    pub segments_removed: usize,
95    pub trigger_type: CompressionTriggerType,
96    pub compression_ratio: f64,
97    pub model_used: Option<String>,
98    pub latency_ms: u64,
99}
100
101pub fn context_window_usage_percent(total_tokens: u32, context_window_tokens: u32) -> f64 {
102    if context_window_tokens == 0 {
103        return 0.0;
104    }
105    (total_tokens as f64 / context_window_tokens as f64) * 100.0
106}
107
108pub fn normalized_trigger_percent(trigger_percent: u8) -> f64 {
109    match trigger_percent {
110        0 => 100.0,
111        1..=100 => trigger_percent as f64,
112        _ => 100.0,
113    }
114}
115
116/// Estimate whether context pressure has crossed the configured threshold for
117/// compression eligibility.
118pub fn estimate_context_compression_exposure(
119    session: &Session,
120    model_name: &str,
121    configured_budget: Option<&TokenBudget>,
122) -> ContextCompressionExposure {
123    // When a budget was already resolved upstream (the production path — see
124    // `resolve_token_budget`, which now also caches it on `session.token_budget`,
125    // issue #20 bug 1), use it directly. Only when none is available do we fall
126    // back to a model-derived budget. No `model_limits.json` registry is in
127    // scope synchronously here, so this fallback resolves to the global default
128    // rather than silently fabricating an empty override registry (#20 bug 2).
129    let budget = configured_budget.cloned().unwrap_or_else(|| {
130        create_budget_for_model(
131            model_name,
132            BudgetStrategy::default(),
133            &ModelLimitsRegistry::new(),
134        )
135    });
136    let counter = TiktokenTokenCounter::default();
137    let active_messages = active_messages_for_budget(session);
138    let active_message_tokens = counter.count_messages(&active_messages);
139    let summary_tokens = session
140        .conversation_summary
141        .as_ref()
142        .map(|summary| counter.count_messages(&[compression_summary_message(&summary.content)]))
143        .unwrap_or(0);
144    let active_tokens = active_message_tokens.saturating_add(summary_tokens);
145    // Use context window as the denominator for a single, provider-aligned
146    // pressure scale across backend and frontend.
147    let context_window = budget.max_context_tokens;
148    let estimated_usage = context_window_usage_percent(active_tokens, context_window);
149    let usage = session
150        .token_usage
151        .as_ref()
152        .and_then(|token_usage| {
153            let denominator = if token_usage.max_context_tokens > 0 {
154                token_usage.max_context_tokens
155            } else if token_usage.budget_limit > 0 {
156                // Legacy payload compatibility.
157                token_usage.budget_limit
158            } else {
159                context_window
160            };
161            (denominator > 0).then_some(context_window_usage_percent(
162                token_usage.total_tokens,
163                denominator,
164            ))
165        })
166        .map(|persisted_usage| persisted_usage.max(estimated_usage))
167        .unwrap_or(estimated_usage);
168
169    let rounded = usage.clamp(0.0, 100.0).round() as u8;
170    let trigger_tokens = budget.compression_trigger_context_tokens();
171    let trigger_percent = if budget.max_context_tokens > 0 {
172        (trigger_tokens as f64 / budget.max_context_tokens as f64) * 100.0
173    } else {
174        0.0
175    };
176    let threshold_reached = usage >= trigger_percent;
177
178    // Check non-system message count to stay consistent with the plan
179    // building requirement of >=3 non-system messages.  Using
180    // active_messages.len() would include system messages and expose the
181    // tool even when plan building would immediately fail.
182    let non_system_count = active_messages
183        .iter()
184        .filter(|m| !matches!(m.role, bamboo_domain::Role::System))
185        .count();
186
187    let should_expose_tool = threshold_reached && non_system_count >= 3;
188
189    ContextCompressionExposure {
190        budget,
191        active_tokens,
192        active_usage_percent: usage,
193        active_usage_percent_rounded: rounded,
194        should_expose_tool,
195    }
196}
197
198/// Build a compression plan that archives older active messages and replaces
199/// them with a caller-provided summary.
200pub fn build_compression_plan_with_summary(
201    session: &Session,
202    model_name: &str,
203    configured_budget: Option<&TokenBudget>,
204    summary_content: String,
205) -> Result<CompressionPlan, CompressionPlanError> {
206    build_compression_plan_with_summary_internal(
207        session,
208        model_name,
209        configured_budget,
210        summary_content,
211        true,
212        CompressionTriggerType::Auto,
213    )
214}
215
216/// Build a compression plan while bypassing "tool exposure" gating.
217///
218/// This is intended for host-enforced fallback paths when context pressure is
219/// critically high and compression must be attempted regardless of the normal
220/// trigger gate.
221pub fn build_forced_compression_plan_with_summary(
222    session: &Session,
223    model_name: &str,
224    configured_budget: Option<&TokenBudget>,
225    summary_content: String,
226    trigger_type: CompressionTriggerType,
227) -> Result<CompressionPlan, CompressionPlanError> {
228    build_compression_plan_with_summary_internal(
229        session,
230        model_name,
231        configured_budget,
232        summary_content,
233        false,
234        trigger_type,
235    )
236}
237
238fn build_compression_plan_with_summary_internal(
239    session: &Session,
240    model_name: &str,
241    configured_budget: Option<&TokenBudget>,
242    summary_content: String,
243    require_exposure_gate: bool,
244    trigger_type: CompressionTriggerType,
245) -> Result<CompressionPlan, CompressionPlanError> {
246    let exposure = estimate_context_compression_exposure(session, model_name, configured_budget);
247    if require_exposure_gate && !exposure.should_expose_tool {
248        return Err(CompressionPlanError::ExposureGateNotMet {
249            usage_percent: exposure.active_usage_percent,
250            trigger_percent: exposure.budget.compression_trigger_percent,
251        });
252    }
253
254    let budget = &exposure.budget;
255    let counter = TiktokenTokenCounter::default();
256    let summary_message = compression_summary_message(&summary_content);
257    let summary_tokens = counter.count_messages(&[summary_message]);
258
259    let context_window = budget.max_context_tokens;
260    let target_limit = budget.compression_target_context_tokens();
261
262    let mut active_messages = active_messages_for_budget(session);
263    if active_messages.is_empty() {
264        tracing::debug!("compression plan: no active messages, cannot build plan");
265        return Err(CompressionPlanError::NoActiveMessages);
266    }
267
268    let system_messages: Vec<Message> = active_messages
269        .iter()
270        .filter(|m| matches!(m.role, bamboo_domain::Role::System))
271        .cloned()
272        .collect();
273    let system_tokens = counter.count_messages(&system_messages);
274    let reserved_non_window_tokens = system_tokens.saturating_add(summary_tokens);
275    let window_limit = target_limit.saturating_sub(reserved_non_window_tokens);
276
277    let non_system: Vec<Message> = active_messages
278        .drain(..)
279        .filter(|m| !matches!(m.role, bamboo_domain::Role::System))
280        .collect();
281
282    if non_system.len() < 3 {
283        tracing::debug!(
284            "compression plan: not enough non-system messages ({}), need at least 3",
285            non_system.len()
286        );
287        return Err(CompressionPlanError::NotEnoughMessages {
288            non_system_count: non_system.len(),
289        });
290    }
291
292    let user_indexes = non_system
293        .iter()
294        .enumerate()
295        .filter_map(|(index, message)| {
296            matches!(message.role, bamboo_domain::Role::User).then_some(index)
297        })
298        .collect::<Vec<_>>();
299    let keep_user_count = user_indexes.len().min(3);
300    let anchor_index = if keep_user_count > 0 {
301        user_indexes[user_indexes.len() - keep_user_count]
302    } else {
303        non_system
304            .iter()
305            .rposition(|m| matches!(m.role, bamboo_domain::Role::User))
306            .unwrap_or(non_system.len().saturating_sub(1))
307    };
308    let protected_user_ids: HashSet<String> = if keep_user_count > 0 {
309        user_indexes[user_indexes.len() - keep_user_count..]
310            .iter()
311            .filter_map(|idx| non_system.get(*idx))
312            .map(|message| message.id.clone())
313            .collect()
314    } else {
315        HashSet::new()
316    };
317
318    tracing::debug!(
319        "compression plan: context_window={}, target_limit={}, system_tokens={}, summary_tokens={}, window_limit={}, non_system_messages={}, keep_user_count={}, keep_from_index={}",
320        context_window, target_limit, system_tokens, summary_tokens, window_limit, non_system.len(), keep_user_count, anchor_index
321    );
322
323    // Keep the newest 3 user turns (or fewer if there are not enough user
324    // turns) as active context and summarize older history before that
325    // boundary. If budget is still too high, continue moving the oldest
326    // non-protected messages into the summarize set.
327    let mut messages_to_summarize = non_system[..anchor_index].to_vec();
328
329    // Protected messages must never be summarized — move them to the keep set.
330    let mut never_compress_ids: Vec<String> = messages_to_summarize
331        .iter()
332        .filter(|m| m.never_compress || is_skill_tool_chain_message(m))
333        .map(|m| m.id.clone())
334        .collect();
335
336    // Also protect tool result messages that correspond to skill tool calls.
337    let skill_call_ids: Vec<String> = messages_to_summarize
338        .iter()
339        .filter(|m| is_skill_tool_chain_message(m))
340        .flat_map(|m| m.tool_calls.iter().flatten().map(|c| c.id.clone()))
341        .collect();
342    if !skill_call_ids.is_empty() {
343        for m in &*messages_to_summarize {
344            if let Some(ref call_id) = m.tool_call_id {
345                if skill_call_ids.contains(call_id) && !never_compress_ids.contains(&m.id) {
346                    never_compress_ids.push(m.id.clone());
347                }
348            }
349        }
350    }
351
352    if !never_compress_ids.is_empty() {
353        messages_to_summarize.retain(|m| !never_compress_ids.contains(&m.id));
354    }
355
356    let non_system_count = non_system.len();
357    let mut messages_to_keep = non_system[anchor_index..].to_vec();
358    // Add never_compress / skill messages to the keep set.
359    for id in &never_compress_ids {
360        if let Some(msg) = non_system.iter().find(|m| &m.id == id) {
361            if !messages_to_keep.iter().any(|m| m.id == *id) {
362                messages_to_keep.push(msg.clone());
363            }
364        }
365    }
366
367    while !messages_to_keep.is_empty() {
368        let keep_tokens = counter.count_messages(&messages_to_keep);
369        if keep_tokens <= window_limit {
370            break;
371        }
372
373        let Some(remove_index) = messages_to_keep.iter().position(|message| {
374            !protected_user_ids.contains(message.id.as_str())
375                && !never_compress_ids.contains(&message.id)
376        }) else {
377            // Remaining messages are all protected; stop shrinking.
378            break;
379        };
380        let moved = messages_to_keep.remove(remove_index);
381        messages_to_summarize.push(moved);
382    }
383
384    if messages_to_summarize.is_empty() {
385        tracing::debug!(
386            "compression plan: messages_to_summarize is empty after anchor/keep splitting"
387        );
388        return Err(CompressionPlanError::NothingToCompress {
389            anchor_index,
390            non_system_count,
391        });
392    }
393
394    let compressed_message_ids = messages_to_summarize
395        .iter()
396        .map(|message| message.id.clone())
397        .collect::<Vec<_>>();
398
399    let keep_tokens = counter.count_messages(&messages_to_keep);
400    let active_before = exposure.active_usage_percent;
401    // Use context_window as denominator, consistent with
402    // estimate_context_compression_exposure().
403    let active_after = if context_window == 0 {
404        0.0
405    } else {
406        let after_total = reserved_non_window_tokens.saturating_add(keep_tokens);
407        (after_total as f64 / context_window as f64) * 100.0
408    };
409
410    // Count actual segments being compressed using the same segmenter that
411    // prepare_hybrid_context uses, so the segment count is accurate.
412    let segmenter = crate::segmenter::MessageSegmenter::new();
413    let segments_removed = segmenter.segment(messages_to_summarize.clone()).len();
414
415    Ok(CompressionPlan {
416        compressed_message_ids,
417        messages_to_summarize,
418        summary_tokens,
419        summary_content,
420        active_usage_before_percent: active_before,
421        active_usage_after_percent: active_after,
422        trigger_percent: budget.compression_trigger_percent,
423        target_percent: budget.compression_target_percent,
424        segments_removed,
425        trigger_type,
426        compression_ratio: 0.0,
427        model_used: None,
428        latency_ms: 0,
429    })
430}
431
432/// Apply a previously computed compression plan to the session.
433/// Extract recently modified files from tool calls in the given messages.
434pub(super) fn extract_recently_modified_files(messages: &[Message]) -> Vec<(String, String)> {
435    let mut files = Vec::new();
436    for message in messages {
437        if let Some(ref tool_calls) = message.tool_calls {
438            for call in tool_calls {
439                let tool_name = call.function.name.as_str();
440                if !matches!(tool_name, "Write" | "Edit" | "Bash") {
441                    continue;
442                }
443                let args = &call.function.arguments;
444                if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(args) {
445                    if let Some(path) = parsed.get("file_path").and_then(|v| v.as_str()) {
446                        files.push((path.to_string(), tool_name.to_string()));
447                    } else if let Some(cmd) = parsed.get("command").and_then(|v| v.as_str()) {
448                        // Extract file paths from shell commands heuristically
449                        for part in cmd.split_whitespace() {
450                            if part.contains('/')
451                                && (part.ends_with(".rs")
452                                    || part.ends_with(".ts")
453                                    || part.ends_with(".js")
454                                    || part.ends_with(".toml")
455                                    || part.ends_with(".json")
456                                    || part.ends_with(".md"))
457                            {
458                                files.push((part.to_string(), "Bash".to_string()));
459                            }
460                        }
461                    }
462                }
463            }
464        }
465    }
466    files.truncate(10);
467    files
468}
469
470/// Extract key decision snippets from assistant messages.
471pub(super) fn extract_key_decisions(messages: &[Message], limit: usize) -> Vec<String> {
472    let decision_keywords = [
473        "decided to",
474        "approach is",
475        "use ",
476        "using ",
477        "we'll go with",
478        "the plan is",
479        "strategy:",
480        "solution:",
481        "chose to",
482        "switched to",
483        "refactored to",
484        "migrated to",
485        "replaced with",
486    ];
487    let mut decisions = Vec::new();
488    for message in messages {
489        if !matches!(message.role, bamboo_domain::Role::Assistant) {
490            continue;
491        }
492        let content = &message.content;
493        for line in content.lines() {
494            let line_lower = line.to_lowercase();
495            if decision_keywords.iter().any(|kw| line_lower.contains(kw)) {
496                let truncated: String = line.chars().take(200).collect();
497                decisions.push(truncated);
498                if decisions.len() >= limit {
499                    return decisions;
500                }
501            }
502        }
503    }
504    decisions
505}
506
507/// Build a post-compaction recovery message that preserves critical context
508/// from the compressed messages so the LLM can continue work without losing
509/// track of active files, tasks, and decisions.
510fn build_post_compaction_recovery_message(
511    compressed_messages: &[Message],
512    session: &Session,
513) -> Option<Message> {
514    if compressed_messages.is_empty() {
515        return None;
516    }
517
518    let mut sections = Vec::new();
519
520    // 1. Recently modified files
521    let files = extract_recently_modified_files(compressed_messages);
522    if !files.is_empty() {
523        let mut section = String::from("## Recently Modified Files\n");
524        for (path, tool) in &files {
525            section.push_str(&format!("- {} ({})\n", path, tool));
526        }
527        sections.push(section);
528    }
529
530    // 2. Active tasks from task list
531    if let Some(ref task_list) = session.task_list {
532        let active_items: Vec<_> = task_list
533            .items
534            .iter()
535            .filter(|item| !matches!(item.status, bamboo_domain::TaskItemStatus::Completed))
536            .collect();
537        if !active_items.is_empty() {
538            let mut section = String::from("## Active Tasks\n");
539            for item in active_items.iter().take(10) {
540                section.push_str(&format!("- [{:?}] {}\n", item.status, item.description));
541            }
542            sections.push(section);
543        }
544    }
545
546    // 3. Key decisions
547    let decisions = extract_key_decisions(compressed_messages, 5);
548    if !decisions.is_empty() {
549        let mut section = String::from("## Key Decisions\n");
550        for decision in &decisions {
551            section.push_str(&format!("- {}\n", decision));
552        }
553        sections.push(section);
554    }
555
556    if sections.is_empty() {
557        return None;
558    }
559
560    let mut content = String::from("[post-compaction-recovery]\nContext extracted from compressed messages for continued work.\n\n");
561    content.push_str(&sections.join("\n"));
562
563    let mut message = Message::assistant(content, None);
564    message.never_compress = true;
565    Some(message)
566}
567
568struct SummaryQualityMetrics {
569    file_coverage: f64,
570    decision_coverage: f64,
571}
572
573fn validate_summary_quality(summary: &str, messages: &[Message]) -> SummaryQualityMetrics {
574    let files = extract_recently_modified_files(messages);
575    let decisions = extract_key_decisions(messages, 10);
576
577    let files_mentioned = files
578        .iter()
579        .filter(|(path, _)| summary.contains(path.as_str()))
580        .count();
581    let file_coverage = if files.is_empty() {
582        1.0
583    } else {
584        files_mentioned as f64 / files.len() as f64
585    };
586
587    let decisions_mentioned = decisions
588        .iter()
589        .filter(|d| {
590            let check_str: String = d.chars().take(50).collect();
591            summary.contains(&check_str)
592        })
593        .count();
594    let decision_coverage = if decisions.is_empty() {
595        1.0
596    } else {
597        decisions_mentioned as f64 / decisions.len() as f64
598    };
599
600    SummaryQualityMetrics {
601        file_coverage,
602        decision_coverage,
603    }
604}
605
606pub fn apply_compression_plan(session: &mut Session, plan: CompressionPlan) -> usize {
607    let compressed_ids: HashSet<&str> = plan
608        .compressed_message_ids
609        .iter()
610        .map(String::as_str)
611        .collect();
612
613    let mut changed_indexes = Vec::new();
614    for (index, message) in session.messages.iter_mut().enumerate() {
615        if message.compressed || !compressed_ids.contains(message.id.as_str()) {
616            continue;
617        }
618        message.compressed = true;
619        changed_indexes.push(index);
620    }
621
622    if changed_indexes.is_empty() {
623        return 0;
624    }
625
626    let event = CompressionEvent::new(
627        changed_indexes.len(),
628        plan.segments_removed,
629        plan.active_usage_before_percent,
630        plan.active_usage_after_percent,
631        plan.summary_tokens,
632        plan.trigger_type,
633        plan.compression_ratio,
634        plan.model_used.clone(),
635        plan.latency_ms,
636    );
637    let event_id = event.id.clone();
638    for index in changed_indexes {
639        session.messages[index].compressed_by_event_id = Some(event_id.clone());
640    }
641    session.compression_events.push(event);
642    session.conversation_summary = Some(ConversationSummary::new(
643        &plan.summary_content,
644        plan.compressed_message_ids.len(),
645        plan.summary_tokens,
646    ));
647
648    // Inject a post-compaction recovery message to preserve critical context
649    // from the compressed messages (files, tasks, decisions).
650    let compressed_messages: Vec<Message> = session
651        .messages
652        .iter()
653        .filter(|m| compressed_ids.contains(m.id.as_str()))
654        .cloned()
655        .collect();
656    if let Some(recovery) = build_post_compaction_recovery_message(&compressed_messages, session) {
657        // Insert just before the last user message, or at the end
658        let insert_pos = session
659            .messages
660            .iter()
661            .rposition(|m| matches!(m.role, bamboo_domain::Role::User) && !m.compressed)
662            .map(|pos| pos + 1)
663            .unwrap_or(session.messages.len());
664        session.messages.insert(insert_pos, recovery);
665    }
666
667    let quality = validate_summary_quality(&plan.summary_content, &compressed_messages);
668    if quality.file_coverage < 0.5 || quality.decision_coverage < 0.3 {
669        tracing::warn!(
670            "[{}] Summary quality: file_coverage={:.0}%, decision_coverage={:.0}%",
671            session.id,
672            quality.file_coverage * 100.0,
673            quality.decision_coverage * 100.0
674        );
675    }
676
677    // Instead of clearing token_usage entirely (which forces the next round
678    // to rely on heuristic estimates that don't account for tool schema
679    // tokens), recompute an approximate post-compression snapshot.  We
680    // preserve the context-window denominator from the previous usage snapshot
681    // so percentages stay consistent across rounds.
682    let counter = TiktokenTokenCounter::default();
683    let remaining_active: Vec<_> = session
684        .messages
685        .iter()
686        .filter(|m| !m.compressed)
687        .cloned()
688        .collect();
689    let system_msgs: Vec<_> = remaining_active
690        .iter()
691        .filter(|m| matches!(m.role, bamboo_domain::Role::System))
692        .cloned()
693        .collect();
694    let window_msgs: Vec<_> = remaining_active
695        .iter()
696        .filter(|m| !matches!(m.role, bamboo_domain::Role::System))
697        .cloned()
698        .collect();
699    let system_tokens = counter.count_messages(&system_msgs);
700    let new_summary_tokens = plan.summary_tokens;
701    let window_tokens = counter.count_messages(&window_msgs);
702    let total_tokens = system_tokens
703        .saturating_add(new_summary_tokens)
704        .saturating_add(window_tokens);
705    let previous_usage = session.token_usage.take();
706    let budget_limit = previous_usage
707        .as_ref()
708        .map(|u| {
709            if u.max_context_tokens > 0 {
710                u.max_context_tokens
711            } else {
712                u.budget_limit
713            }
714        })
715        .unwrap_or(0);
716    let max_context_tokens = previous_usage
717        .as_ref()
718        .map(|u| u.max_context_tokens)
719        .unwrap_or(0);
720    session.token_usage = Some(bamboo_domain::TokenBudgetUsage {
721        system_tokens,
722        summary_tokens: new_summary_tokens,
723        window_tokens,
724        total_tokens,
725        max_context_tokens,
726        budget_limit,
727        truncation_occurred: false,
728        segments_removed: 0,
729        prompt_cached_tool_outputs: 0,
730        prompt_cached_tool_tokens_saved: 0,
731        thinking_tokens: 0,
732        cache_read_input_tokens: 0,
733    });
734
735    session.updated_at = Utc::now();
736    plan.compressed_message_ids.len()
737}
738
739pub fn compression_summary_message(summary_content: &str) -> Message {
740    Message::system(format!(
741        "<!-- CONVERSATION_SUMMARY_START -->\n\
742         ## Previous Conversation Summary\n\
743         The following is compressed historical context for continuity only.\n\
744         It is background memory, not a new user request. Follow the current task list and recent messages over this summary when they conflict.\n\n\
745         {}\n\
746         <!-- CONVERSATION_SUMMARY_END -->",
747        summary_content
748    ))
749}
750
751pub fn active_messages_for_budget(session: &Session) -> Vec<Message> {
752    session
753        .messages
754        .iter()
755        .filter(|message| !message.compressed)
756        .cloned()
757        .collect()
758}
759
760pub fn summary_source_messages(session: &Session) -> Vec<Message> {
761    session
762        .messages
763        .iter()
764        .filter(|message| !message.compressed)
765        .filter(|message| !matches!(message.role, bamboo_domain::Role::System))
766        .cloned()
767        .collect()
768}
769
770pub fn build_summary_prompt(
771    session: &Session,
772    messages: &[Message],
773    existing_summary: Option<&str>,
774) -> String {
775    let mut content = String::new();
776    content.push_str(
777        "You are compressing conversation history for continued work. Produce a compact but reliable working-memory summary.\n\n",
778    );
779    content.push_str(
780        "Critical requirements:\n- First capture the in-flight work right before compression (what was being done, where, and with which tool/file)\n- Distinguish clearly between ACTIVE work, COMPLETED work, and OBSOLETE or superseded work\n- Do not restate old tasks as active unless they are still unresolved\n- The current task list is the source of truth for what is actively being worked on\n- Preserve constraints, decisions, file paths, code changes, errors, tool findings, blockers, and the next step\n- If earlier plans conflict with the current task list or newer messages, treat the earlier plans as obsolete or completed\n- Explicitly evaluate each clear user requirement (e.g. requirement 1, requirement 2) with a status and evidence\n- Return only summary text in the same language as the conversation\n\n",
781    );
782
783    if let Some(existing) = existing_summary.map(str::trim).filter(|s| !s.is_empty()) {
784        content.push_str("## Existing Summary\n");
785        content.push_str(existing);
786        content.push_str("\n\n");
787    }
788
789    let task_list_prompt = session.format_task_list_for_prompt();
790    if !task_list_prompt.trim().is_empty() {
791        content.push_str("## Current Task List\n");
792        content.push_str(task_list_prompt.trim());
793        content.push_str("\n\n");
794    }
795
796    content.push_str(
797        "## Required Output Sections\n1. Pre-compression in-flight work (what was being done immediately before compression)\n2. Current active objective\n3. Requirement checklist (Requirement | Status: completed/in_progress/pending/blocked/obsolete | Evidence)\n4. Active tasks\n5. Completed tasks\n6. Obsolete or superseded tasks\n7. Important context and constraints\n8. Files, code, and tool findings\n9. Open issues and next step\n\n",
798    );
799
800    content.push_str("## Messages To Compress\n\n");
801    for message in messages {
802        let role = match message.role {
803            bamboo_domain::Role::System => continue,
804            bamboo_domain::Role::User => "User",
805            bamboo_domain::Role::Assistant => match message.phase {
806                Some(MessagePhase::Commentary) => "Assistant Commentary",
807                Some(MessagePhase::FinalAnswer) => "Assistant Final",
808                None => "Assistant",
809            },
810            bamboo_domain::Role::Tool => "Tool Result",
811        };
812
813        content.push_str("### ");
814        content.push_str(role);
815        content.push('\n');
816        if let Some(tool_calls) = &message.tool_calls {
817            if !tool_calls.is_empty() {
818                let names = tool_calls
819                    .iter()
820                    .map(|call| call.function.name.as_str())
821                    .collect::<Vec<_>>()
822                    .join(", ");
823                content.push_str("Called tools: ");
824                content.push_str(&names);
825                content.push('\n');
826            }
827        }
828        if let Some(tool_call_id) = &message.tool_call_id {
829            content.push_str("Tool call id: ");
830            content.push_str(tool_call_id);
831            content.push('\n');
832        }
833        let snippet = truncate_chars(&message.content, 2000);
834        content.push_str(&snippet);
835        content.push_str("\n\n");
836    }
837
838    content.push_str(
839        "Return only the summary text. Be explicit about what is active now versus what is already done or no longer relevant.",
840    );
841    content
842}
843
844fn truncate_chars(value: &str, max_chars: usize) -> String {
845    if value.chars().count() <= max_chars {
846        return value.to_string();
847    }
848    value.chars().take(max_chars).collect::<String>() + "..."
849}
850
851#[cfg(test)]
852mod tests {
853    use super::*;
854    use bamboo_domain::TokenBudgetUsage;
855    use bamboo_domain::{FunctionCall, TaskItem, TaskItemStatus, TaskList, ToolCall};
856    use chrono::Utc;
857
858    fn make_budget() -> TokenBudget {
859        TokenBudget {
860            max_context_tokens: 1000,
861            max_output_tokens: 100,
862            strategy: BudgetStrategy::Hybrid {
863                window_size: 20,
864                enable_summarization: true,
865            },
866            safety_margin: 0,
867            compression_trigger_percent: 50,
868            compression_target_percent: 20,
869            working_reserve_tokens: 0,
870            fallback_trigger_percent: 75,
871            prompt_cache_min_tool_output_chars: 1_200,
872            prompt_cache_head_chars: 280,
873            prompt_cache_tail_chars: 180,
874            prompt_cache_recent_user_turns: 2,
875            prompt_cache_recent_tool_chains: 2,
876            max_tool_output_tokens: 0,
877        }
878    }
879
880    fn make_session_with_pressure() -> Session {
881        let mut session = Session::new("compression-hysteresis", "gpt-4o-mini");
882        session.token_budget = Some(make_budget());
883        session.add_message(Message::system("system"));
884        for i in 0..3 {
885            session.add_message(Message::user(format!(
886                "User message {i}: {}",
887                "alpha beta gamma delta epsilon ".repeat(2)
888            )));
889            session.add_message(Message::assistant(
890                format!(
891                    "Assistant message {i}: {}",
892                    "work log decisions next steps ".repeat(2)
893                ),
894                None,
895            ));
896        }
897        session
898    }
899
900    #[test]
901    fn context_window_usage_percent_uses_context_window_denominator() {
902        assert_eq!(context_window_usage_percent(0, 0), 0.0);
903        assert_eq!(context_window_usage_percent(500, 1000), 50.0);
904    }
905
906    #[test]
907    fn estimate_context_compression_exposure_crosses_trigger_when_usage_is_high_enough() {
908        let mut session = make_session_with_pressure();
909        if let Some(budget) = session.token_budget.as_mut() {
910            budget.compression_trigger_percent = 10;
911        }
912        let exposure = estimate_context_compression_exposure(
913            &session,
914            "gpt-4o-mini",
915            session.token_budget.as_ref(),
916        );
917        assert!(exposure.active_usage_percent >= 10.0);
918        assert!(exposure.should_expose_tool);
919    }
920
921    #[test]
922    fn estimate_context_compression_exposure_stays_below_trigger_when_usage_is_low() {
923        let mut session = make_session_with_pressure();
924        if let Some(budget) = session.token_budget.as_mut() {
925            budget.compression_trigger_percent = 99;
926        }
927
928        let exposure = estimate_context_compression_exposure(
929            &session,
930            "gpt-4o-mini",
931            session.token_budget.as_ref(),
932        );
933
934        assert!(exposure.active_usage_percent < 99.0);
935        assert!(!exposure.should_expose_tool);
936    }
937
938    #[test]
939    fn build_summary_prompt_includes_task_list_and_state_sections() {
940        let mut session = Session::new("summary-prompt", "gpt-4o-mini");
941        session.set_task_list(TaskList {
942            session_id: session.id.clone(),
943            title: "Task List".to_string(),
944            items: vec![
945                TaskItem {
946                    id: "task_1".to_string(),
947                    description: "检查 51% 又回落到 50% 的触发逻辑".to_string(),
948                    status: TaskItemStatus::InProgress,
949                    depends_on: Vec::new(),
950                    notes: "避免刚压缩完又立刻再次压缩".to_string(),
951                    ..TaskItem::default()
952                },
953                TaskItem {
954                    id: "task_2".to_string(),
955                    description: "重写 summarizer prompt 并纳入 task list".to_string(),
956                    status: TaskItemStatus::Pending,
957                    depends_on: Vec::new(),
958                    notes: String::new(),
959                    ..TaskItem::default()
960                },
961            ],
962            created_at: Utc::now(),
963            updated_at: Utc::now(),
964        });
965        let prompt = build_summary_prompt(
966            &session,
967            &[
968                Message::user("继续修复 context compression"),
969                Message::assistant("先分析 trigger / target / summary", None),
970            ],
971            Some("old summary"),
972        );
973
974        assert!(prompt.contains("## Current Task List"));
975        assert!(prompt.contains("Current active objective"));
976        assert!(prompt.contains("Requirement checklist"));
977        assert!(prompt.contains("Active tasks"));
978        assert!(prompt.contains("Completed tasks"));
979        assert!(prompt.contains("Obsolete or superseded tasks"));
980        assert!(prompt.contains("检查 51% 又回落到 50% 的触发逻辑"));
981        assert!(prompt.contains("old summary"));
982    }
983
984    #[test]
985    fn forced_plan_keeps_last_three_user_messages_active() {
986        let budget = TokenBudget {
987            max_context_tokens: 1200,
988            max_output_tokens: 100,
989            strategy: BudgetStrategy::Hybrid {
990                window_size: 20,
991                enable_summarization: true,
992            },
993            safety_margin: 0,
994            compression_trigger_percent: 80,
995            compression_target_percent: 20,
996            working_reserve_tokens: 0,
997            fallback_trigger_percent: 75,
998            prompt_cache_min_tool_output_chars: 1_200,
999            prompt_cache_head_chars: 280,
1000            prompt_cache_tail_chars: 180,
1001            prompt_cache_recent_user_turns: 2,
1002            prompt_cache_recent_tool_chains: 2,
1003            max_tool_output_tokens: 0,
1004        };
1005        let mut session = Session::new("keep-last-three-user-turns", "gpt-4o-mini");
1006        session.token_budget = Some(budget.clone());
1007        session.add_message(Message::system("system"));
1008        for i in 0..6 {
1009            session.add_message(Message::user(format!(
1010                "U{i}: {}",
1011                "alpha beta gamma ".repeat(8)
1012            )));
1013            session.add_message(Message::assistant(
1014                format!("A{i}: {}", "analysis plan steps ".repeat(8)),
1015                None,
1016            ));
1017        }
1018
1019        let plan = build_forced_compression_plan_with_summary(
1020            &session,
1021            "gpt-4o-mini",
1022            Some(&budget),
1023            "summary".to_string(),
1024            CompressionTriggerType::CriticalOverflow,
1025        )
1026        .expect("forced plan should build");
1027
1028        let compressed_ids = plan
1029            .compressed_message_ids
1030            .iter()
1031            .map(String::as_str)
1032            .collect::<HashSet<_>>();
1033        let kept_user_contents = session
1034            .messages
1035            .iter()
1036            .filter(|message| !matches!(message.role, bamboo_domain::Role::System))
1037            .filter(|message| !compressed_ids.contains(message.id.as_str()))
1038            .filter(|message| matches!(message.role, bamboo_domain::Role::User))
1039            .map(|message| message.content.clone())
1040            .collect::<Vec<_>>();
1041
1042        assert!(
1043            kept_user_contents.len() >= 3,
1044            "expected to keep at least 3 user messages, got {}",
1045            kept_user_contents.len()
1046        );
1047        assert!(kept_user_contents
1048            .iter()
1049            .any(|content| content.starts_with("U3:")));
1050        assert!(kept_user_contents
1051            .iter()
1052            .any(|content| content.starts_with("U4:")));
1053        assert!(kept_user_contents
1054            .iter()
1055            .any(|content| content.starts_with("U5:")));
1056    }
1057
1058    #[test]
1059    fn estimate_exposure_prefers_persisted_budget_usage_when_higher() {
1060        let mut session = Session::new("persisted-usage", "gpt-4o-mini");
1061        session.token_budget = Some(TokenBudget {
1062            max_context_tokens: 100_000,
1063            max_output_tokens: 1_000,
1064            strategy: BudgetStrategy::Hybrid {
1065                window_size: 20,
1066                enable_summarization: true,
1067            },
1068            safety_margin: 0,
1069            compression_trigger_percent: 80,
1070            compression_target_percent: 50,
1071            working_reserve_tokens: 0,
1072            fallback_trigger_percent: 75,
1073            prompt_cache_min_tool_output_chars: 1_200,
1074            prompt_cache_head_chars: 280,
1075            prompt_cache_tail_chars: 180,
1076            prompt_cache_recent_user_turns: 2,
1077            prompt_cache_recent_tool_chains: 2,
1078            max_tool_output_tokens: 0,
1079        });
1080        session.add_message(Message::system("system"));
1081        session.add_message(Message::user("short"));
1082        session.add_message(Message::assistant("short", None));
1083        session.add_message(Message::user("follow-up"));
1084        session.add_message(Message::assistant("reply", None));
1085        session.token_usage = Some(TokenBudgetUsage {
1086            system_tokens: 100,
1087            summary_tokens: 0,
1088            window_tokens: 95_900,
1089            total_tokens: 96_000,
1090            max_context_tokens: 100_000,
1091            budget_limit: 10_000,
1092            truncation_occurred: true,
1093            segments_removed: 12,
1094            prompt_cached_tool_outputs: 0,
1095            prompt_cached_tool_tokens_saved: 0,
1096            thinking_tokens: 0,
1097            cache_read_input_tokens: 0,
1098        });
1099
1100        let exposure = estimate_context_compression_exposure(
1101            &session,
1102            "gpt-4o-mini",
1103            session.token_budget.as_ref(),
1104        );
1105
1106        assert!(
1107            exposure.active_usage_percent >= 96.0,
1108            "expected persisted context-window usage to drive exposure, got {}",
1109            exposure.active_usage_percent
1110        );
1111        assert!(exposure.should_expose_tool);
1112    }
1113
1114    #[test]
1115    fn never_compress_messages_are_excluded_from_summarize_set() {
1116        let budget = TokenBudget {
1117            max_context_tokens: 1200,
1118            max_output_tokens: 100,
1119            strategy: BudgetStrategy::Hybrid {
1120                window_size: 20,
1121                enable_summarization: true,
1122            },
1123            safety_margin: 0,
1124            compression_trigger_percent: 80,
1125            compression_target_percent: 20,
1126            working_reserve_tokens: 0,
1127            fallback_trigger_percent: 75,
1128            prompt_cache_min_tool_output_chars: 1_200,
1129            prompt_cache_head_chars: 280,
1130            prompt_cache_tail_chars: 180,
1131            prompt_cache_recent_user_turns: 2,
1132            prompt_cache_recent_tool_chains: 2,
1133            max_tool_output_tokens: 0,
1134        };
1135        let mut session = Session::new("never-compress-test", "gpt-4o-mini");
1136        session.token_budget = Some(budget.clone());
1137        session.add_message(Message::system("system"));
1138
1139        // Old user message that should be summarized
1140        session.add_message(Message::user("Old question about X"));
1141        session.add_message(Message::assistant("Old answer about X", None));
1142
1143        // Protected user message (never_compress = true)
1144        let mut protected = Message::user("Critical context that must survive");
1145        protected.never_compress = true;
1146        session.add_message(protected);
1147        session.add_message(Message::assistant("Response to critical", None));
1148
1149        // Recent user messages that anchor the keep window
1150        for i in 0..4 {
1151            session.add_message(Message::user(format!(
1152                "Recent U{i}: {}",
1153                "padding text to fill budget ".repeat(6)
1154            )));
1155            session.add_message(Message::assistant(
1156                format!("Recent A{i}: {}", "reply padding text ".repeat(6)),
1157                None,
1158            ));
1159        }
1160
1161        let plan = build_forced_compression_plan_with_summary(
1162            &session,
1163            "gpt-4o-mini",
1164            Some(&budget),
1165            "summary".to_string(),
1166            CompressionTriggerType::Auto,
1167        )
1168        .expect("plan should build");
1169
1170        let compressed_ids: HashSet<&str> = plan
1171            .compressed_message_ids
1172            .iter()
1173            .map(String::as_str)
1174            .collect();
1175
1176        // Find the never_compress message
1177        let protected_msg = session
1178            .messages
1179            .iter()
1180            .find(|m| m.never_compress)
1181            .expect("should find the protected message");
1182
1183        assert!(
1184            !compressed_ids.contains(protected_msg.id.as_str()),
1185            "never_compress message should NOT be in the compressed set"
1186        );
1187    }
1188
1189    #[test]
1190    fn skill_tool_chain_messages_are_protected_from_compression() {
1191        let budget = TokenBudget {
1192            max_context_tokens: 1200,
1193            max_output_tokens: 100,
1194            strategy: BudgetStrategy::Hybrid {
1195                window_size: 20,
1196                enable_summarization: true,
1197            },
1198            safety_margin: 0,
1199            compression_trigger_percent: 80,
1200            compression_target_percent: 20,
1201            working_reserve_tokens: 0,
1202            fallback_trigger_percent: 75,
1203            prompt_cache_min_tool_output_chars: 1_200,
1204            prompt_cache_head_chars: 280,
1205            prompt_cache_tail_chars: 180,
1206            prompt_cache_recent_user_turns: 2,
1207            prompt_cache_recent_tool_chains: 2,
1208            max_tool_output_tokens: 0,
1209        };
1210        let mut session = Session::new("skill-chain-test", "gpt-4o-mini");
1211        session.token_budget = Some(budget.clone());
1212        session.add_message(Message::system("system"));
1213
1214        // Skill tool chain (load_skill + read_skill_resource)
1215        let mut skill_call = Message::assistant(String::new(), None);
1216        skill_call.tool_calls = Some(vec![ToolCall {
1217            id: "tc-skill".to_string(),
1218            tool_type: "function".to_string(),
1219            function: FunctionCall {
1220                name: "load_skill".to_string(),
1221                arguments: r#"{"skill_id":"my-skill"}"#.to_string(),
1222            },
1223        }]);
1224        session.add_message(skill_call);
1225
1226        let mut skill_result = Message::tool_result("tc-skill", "skill loaded");
1227        skill_result.tool_success = Some(true);
1228        session.add_message(skill_result);
1229
1230        // Regular messages to fill budget
1231        for i in 0..6 {
1232            session.add_message(Message::user(format!(
1233                "U{i}: {}",
1234                "alpha beta gamma delta ".repeat(8)
1235            )));
1236            session.add_message(Message::assistant(
1237                format!("A{i}: {}", "analysis steps plan ".repeat(8)),
1238                None,
1239            ));
1240        }
1241
1242        let plan = build_forced_compression_plan_with_summary(
1243            &session,
1244            "gpt-4o-mini",
1245            Some(&budget),
1246            "summary".to_string(),
1247            CompressionTriggerType::Auto,
1248        )
1249        .expect("plan should build");
1250
1251        let compressed_ids: HashSet<&str> = plan
1252            .compressed_message_ids
1253            .iter()
1254            .map(String::as_str)
1255            .collect();
1256
1257        // Skill tool chain messages should not be compressed
1258        let skill_messages: Vec<&Message> = session
1259            .messages
1260            .iter()
1261            .filter(|m| {
1262                m.tool_calls
1263                    .as_ref()
1264                    .is_some_and(|calls| calls.iter().any(|c| c.function.name == "load_skill"))
1265                    || m.tool_call_id.as_deref() == Some("tc-skill")
1266            })
1267            .collect();
1268
1269        for msg in &skill_messages {
1270            assert!(
1271                !compressed_ids.contains(msg.id.as_str()),
1272                "skill tool chain message {} should NOT be compressed",
1273                msg.id
1274            );
1275        }
1276    }
1277
1278    #[test]
1279    fn recovery_message_returns_none_for_empty_messages() {
1280        let session = Session::new("recovery-empty", "model");
1281        let result = build_post_compaction_recovery_message(&[], &session);
1282        assert!(result.is_none());
1283    }
1284
1285    #[test]
1286    fn recovery_message_has_never_compress_flag() {
1287        let mut session = Session::new("recovery-flag", "model");
1288        let messages = vec![Message::assistant("no decisions here", None)];
1289        session.set_task_list(TaskList {
1290            session_id: session.id.clone(),
1291            title: "Tasks".to_string(),
1292            items: vec![TaskItem {
1293                id: "t1".to_string(),
1294                description: "Active task".to_string(),
1295                status: TaskItemStatus::InProgress,
1296                ..TaskItem::default()
1297            }],
1298            created_at: Utc::now(),
1299            updated_at: Utc::now(),
1300        });
1301        let recovery = build_post_compaction_recovery_message(&messages, &session)
1302            .expect("should return recovery message");
1303        assert!(recovery.never_compress);
1304        assert!(recovery.content.contains("[post-compaction-recovery]"));
1305    }
1306
1307    #[test]
1308    fn recovery_message_extracts_file_paths_from_tool_calls() {
1309        let session = Session::new("recovery-files", "model");
1310        let mut write_call = Message::assistant("writing file", None);
1311        write_call.tool_calls = Some(vec![ToolCall {
1312            id: "tc1".to_string(),
1313            tool_type: "function".to_string(),
1314            function: FunctionCall {
1315                name: "Write".to_string(),
1316                arguments: r#"{"file_path":"/src/main.rs","content":"fn main() {}"}"#.to_string(),
1317            },
1318        }]);
1319        let mut edit_call = Message::assistant("editing file", None);
1320        edit_call.tool_calls = Some(vec![ToolCall {
1321            id: "tc2".to_string(),
1322            tool_type: "function".to_string(),
1323            function: FunctionCall {
1324                name: "Edit".to_string(),
1325                arguments: r#"{"file_path":"/lib/utils.rs","old":"x","new":"y"}"#.to_string(),
1326            },
1327        }]);
1328        let messages = vec![write_call, edit_call];
1329
1330        let recovery = build_post_compaction_recovery_message(&messages, &session)
1331            .expect("should return recovery");
1332        assert!(recovery.content.contains("/src/main.rs"));
1333        assert!(recovery.content.contains("/lib/utils.rs"));
1334        assert!(recovery.content.contains("Recently Modified Files"));
1335    }
1336
1337    #[test]
1338    fn recovery_message_includes_active_tasks() {
1339        let mut session = Session::new("recovery-tasks", "model");
1340        session.set_task_list(TaskList {
1341            session_id: session.id.clone(),
1342            title: "Tasks".to_string(),
1343            items: vec![
1344                TaskItem {
1345                    id: "t1".to_string(),
1346                    description: "Fix auth middleware".to_string(),
1347                    status: TaskItemStatus::InProgress,
1348                    ..TaskItem::default()
1349                },
1350                TaskItem {
1351                    id: "t2".to_string(),
1352                    description: "Add tests".to_string(),
1353                    status: TaskItemStatus::Pending,
1354                    ..TaskItem::default()
1355                },
1356                TaskItem {
1357                    id: "t3".to_string(),
1358                    description: "Done task".to_string(),
1359                    status: TaskItemStatus::Completed,
1360                    ..TaskItem::default()
1361                },
1362            ],
1363            created_at: Utc::now(),
1364            updated_at: Utc::now(),
1365        });
1366        let messages = vec![Message::assistant("some work", None)];
1367
1368        let recovery = build_post_compaction_recovery_message(&messages, &session)
1369            .expect("should return recovery");
1370        assert!(recovery.content.contains("Active Tasks"));
1371        assert!(recovery.content.contains("Fix auth middleware"));
1372        assert!(recovery.content.contains("Add tests"));
1373        // Completed tasks should NOT appear in active tasks
1374        assert!(!recovery.content.contains("Done task"));
1375    }
1376
1377    #[test]
1378    fn apply_compression_plan_injects_recovery_message() {
1379        let budget = TokenBudget {
1380            max_context_tokens: 1200,
1381            max_output_tokens: 100,
1382            strategy: BudgetStrategy::Hybrid {
1383                window_size: 20,
1384                enable_summarization: true,
1385            },
1386            safety_margin: 0,
1387            compression_trigger_percent: 80,
1388            compression_target_percent: 20,
1389            working_reserve_tokens: 0,
1390            fallback_trigger_percent: 75,
1391            prompt_cache_min_tool_output_chars: 1_200,
1392            prompt_cache_head_chars: 280,
1393            prompt_cache_tail_chars: 180,
1394            prompt_cache_recent_user_turns: 2,
1395            prompt_cache_recent_tool_chains: 2,
1396            max_tool_output_tokens: 0,
1397        };
1398        let mut session = Session::new("recovery-inject", "gpt-4o-mini");
1399        session.token_budget = Some(budget.clone());
1400        session.add_message(Message::system("system"));
1401
1402        // Old messages with tool calls containing file paths
1403        let mut write_msg = Message::assistant("writing", None);
1404        write_msg.tool_calls = Some(vec![ToolCall {
1405            id: "tc-w".to_string(),
1406            tool_type: "function".to_string(),
1407            function: FunctionCall {
1408                name: "Write".to_string(),
1409                arguments: r#"{"file_path":"/src/lib.rs","content":"pub fn hello() {}"}"#
1410                    .to_string(),
1411            },
1412        }]);
1413        session.add_message(Message::user("Write the file"));
1414        session.add_message(write_msg);
1415
1416        // Fill with enough messages to force compression
1417        for i in 0..6 {
1418            session.add_message(Message::user(format!(
1419                "U{i}: {}",
1420                "alpha beta gamma delta ".repeat(8)
1421            )));
1422            session.add_message(Message::assistant(
1423                format!("A{i}: {}", "analysis plan ".repeat(8)),
1424                None,
1425            ));
1426        }
1427
1428        let plan = build_forced_compression_plan_with_summary(
1429            &session,
1430            "gpt-4o-mini",
1431            Some(&budget),
1432            "summary text".to_string(),
1433            CompressionTriggerType::Auto,
1434        )
1435        .expect("plan should build");
1436
1437        assert!(!plan.compressed_message_ids.is_empty());
1438
1439        let compressed_count = apply_compression_plan(&mut session, plan);
1440        assert!(compressed_count > 0);
1441
1442        // Verify recovery message was injected
1443        let has_recovery = session.messages.iter().any(|m| {
1444            m.never_compress
1445                && m.content.contains("[post-compaction-recovery]")
1446                && m.content.contains("/src/lib.rs")
1447        });
1448        assert!(
1449            has_recovery,
1450            "session should contain a post-compaction recovery message with the file path"
1451        );
1452    }
1453
1454    #[test]
1455    fn summary_quality_full_coverage_when_all_files_mentioned() {
1456        let messages = vec![{
1457            let mut m = Message::assistant("writing", None);
1458            m.tool_calls = Some(vec![ToolCall {
1459                id: "tc1".to_string(),
1460                tool_type: "function".to_string(),
1461                function: FunctionCall {
1462                    name: "Write".to_string(),
1463                    arguments: r#"{"file_path":"/src/main.rs","content":"fn main() {}"}"#
1464                        .to_string(),
1465                },
1466            }]);
1467            m
1468        }];
1469        let summary = "Modified /src/main.rs to add main function";
1470        let quality = validate_summary_quality(summary, &messages);
1471        assert!(
1472            quality.file_coverage >= 0.99,
1473            "file_coverage should be ~1.0, got {:.2}",
1474            quality.file_coverage
1475        );
1476    }
1477
1478    #[test]
1479    fn summary_quality_zero_coverage_when_no_files_mentioned() {
1480        let messages = vec![{
1481            let mut m = Message::assistant("writing", None);
1482            m.tool_calls = Some(vec![ToolCall {
1483                id: "tc1".to_string(),
1484                tool_type: "function".to_string(),
1485                function: FunctionCall {
1486                    name: "Write".to_string(),
1487                    arguments: r#"{"file_path":"/src/main.rs","content":"fn main() {}"}"#
1488                        .to_string(),
1489                },
1490            }]);
1491            m
1492        }];
1493        let summary = "Summary that mentions nothing about files";
1494        let quality = validate_summary_quality(summary, &messages);
1495        assert!(
1496            quality.file_coverage < 0.01,
1497            "file_coverage should be ~0.0, got {:.2}",
1498            quality.file_coverage
1499        );
1500    }
1501
1502    #[test]
1503    fn summary_quality_handles_empty_messages() {
1504        let quality = validate_summary_quality("some summary", &[]);
1505        assert_eq!(quality.file_coverage, 1.0);
1506        assert_eq!(quality.decision_coverage, 1.0);
1507    }
1508}