Skip to main content

bamboo_compression/
summarizer.rs

1//! Conversation summarization for rolling context management.
2//!
3//! When conversations are truncated due to token limits, a summary preserves
4//! key information from earlier context.
5
6use async_trait::async_trait;
7use bamboo_agent_core::{Message, Role};
8use bamboo_domain::ReasoningEffort;
9use bamboo_infrastructure::LLMChunk;
10use bamboo_infrastructure::{LLMProvider, LLMRequestOptions};
11use futures::StreamExt;
12use std::collections::HashSet;
13use std::sync::Arc;
14
15/// Trait for summarization implementations.
16#[async_trait]
17pub trait Summarizer: Send + Sync {
18    /// Generate a summary of the given messages.
19    ///
20    /// Returns a string containing the summary.
21    async fn summarize(&self, messages: &[Message]) -> Result<String, crate::types::BudgetError>;
22
23    /// Get the estimated token count of the summary.
24    ///
25    /// Used to ensure the summary fits within the budget.
26    fn estimate_summary_tokens(&self, message_count: usize) -> u32 {
27        // Rough estimate: each message contributes ~50 tokens to the summary
28        (message_count * 50).min(1000) as u32
29    }
30}
31
32/// Heuristic summarizer that extracts key points without using an LLM.
33///
34/// This is a lightweight summarization approach that:
35/// 1. Lists user questions/requests
36/// 2. Lists tools that were used
37/// 3. Captures final conclusions
38///
39/// This provides continuity without expensive LLM calls.
40#[derive(Debug, Default)]
41pub struct HeuristicSummarizer;
42
43impl HeuristicSummarizer {
44    /// Create a new heuristic summarizer.
45    pub fn new() -> Self {
46        Self
47    }
48
49    /// Extract user questions from messages.
50    fn extract_user_questions<'a>(&self, messages: &'a [Message]) -> Vec<&'a str> {
51        messages
52            .iter()
53            .filter(|m| m.role == Role::User)
54            .filter(|m| !m.content.is_empty())
55            .take(10) // Limit to prevent huge summaries
56            .map(|m| m.content.as_str())
57            .collect()
58    }
59
60    /// Extract tool calls that were made.
61    fn extract_tools_used(&self, messages: &[Message]) -> Vec<String> {
62        let mut tools = HashSet::new();
63
64        for message in messages {
65            if let Some(ref tool_calls) = message.tool_calls {
66                for call in tool_calls {
67                    tools.insert(call.function.name.clone());
68                }
69            }
70        }
71
72        let mut result: Vec<String> = tools.into_iter().collect();
73        result.sort();
74        result
75    }
76
77    /// Extract key assistant responses.
78    fn extract_key_responses<'a>(&self, messages: &'a [Message]) -> Vec<&'a str> {
79        messages
80            .iter()
81            .filter(|m| m.role == Role::Assistant)
82            .filter(|m| !m.content.is_empty())
83            .rev() // Take most recent first
84            .take(3)
85            .map(|m| m.content.as_str())
86            .collect()
87    }
88
89    /// Safely truncate a string at a character boundary.
90    /// Uses char_indices() to ensure we don't split UTF-8 multi-byte characters.
91    fn safe_truncate(&self, s: &str, max_chars: usize) -> String {
92        if s.chars().count() <= max_chars {
93            return s.to_string();
94        }
95
96        // Take up to max_chars characters safely
97        let truncated: String = s.chars().take(max_chars).collect();
98        format!("{}...", truncated)
99    }
100}
101
102#[async_trait]
103impl Summarizer for HeuristicSummarizer {
104    async fn summarize(&self, messages: &[Message]) -> Result<String, crate::types::BudgetError> {
105        if messages.is_empty() {
106            return Ok("No conversation history.".to_string());
107        }
108
109        let questions = self.extract_user_questions(messages);
110        let tools = self.extract_tools_used(messages);
111        let responses = self.extract_key_responses(messages);
112
113        let mut summary_parts = Vec::new();
114
115        // User requests section
116        if !questions.is_empty() {
117            summary_parts.push("## User Requests".to_string());
118            for (i, q) in questions.iter().enumerate() {
119                // Truncate long questions for the summary (safe UTF-8)
120                let truncated = self.safe_truncate(q, 200);
121                summary_parts.push(format!("{}. {}", i + 1, truncated));
122            }
123        }
124
125        // Tools used section
126        if !tools.is_empty() {
127            summary_parts.push("\n## Tools Used".to_string());
128            for tool in tools {
129                summary_parts.push(format!("- {}", tool));
130            }
131        }
132
133        // Key responses section
134        if !responses.is_empty() {
135            summary_parts.push("\n## Key Outcomes".to_string());
136            for (i, r) in responses.iter().enumerate() {
137                // Truncate long responses (safe UTF-8)
138                let truncated = self.safe_truncate(r, 300);
139                summary_parts.push(format!("{}. {}", i + 1, truncated));
140            }
141        }
142
143        if summary_parts.is_empty() {
144            Ok("Previous conversation context available.".to_string())
145        } else {
146            Ok(summary_parts.join("\n"))
147        }
148    }
149}
150
151/// Trigger conditions for when to create a summary.
152#[derive(Debug, Clone)]
153pub enum SummaryTrigger {
154    /// Always summarize when truncation occurs
155    OnTruncation,
156    /// Summarize after N rounds of conversation
157    Periodic { interval: usize },
158    /// Summarize when token count exceeds threshold
159    TokenThreshold { threshold: u32 },
160}
161
162/// Manager for conversation summarization.
163pub struct SummaryManager {
164    summarizer: Box<dyn Summarizer>,
165    trigger: SummaryTrigger,
166}
167
168impl std::fmt::Debug for SummaryManager {
169    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
170        f.debug_struct("SummaryManager")
171            .field("trigger", &self.trigger)
172            .finish_non_exhaustive()
173    }
174}
175
176impl SummaryManager {
177    /// Create a new summary manager.
178    pub fn new(summarizer: impl Summarizer + 'static, trigger: SummaryTrigger) -> Self {
179        Self {
180            summarizer: Box::new(summarizer),
181            trigger,
182        }
183    }
184
185    /// Check if summarization should be triggered based on conversation state.
186    pub fn should_summarize(
187        &self,
188        messages: &[Message],
189        _truncation_occurred: bool,
190        current_token_count: u32,
191    ) -> bool {
192        match &self.trigger {
193            SummaryTrigger::OnTruncation => _truncation_occurred,
194            SummaryTrigger::Periodic { interval } => messages.len() >= *interval,
195            SummaryTrigger::TokenThreshold { threshold } => current_token_count >= *threshold,
196        }
197    }
198
199    /// Generate a summary of the messages.
200    pub async fn summarize(
201        &self,
202        messages: &[Message],
203    ) -> Result<String, crate::types::BudgetError> {
204        self.summarizer.summarize(messages).await
205    }
206
207    /// Estimate the token count of a summary for N messages.
208    pub fn estimate_summary_tokens(&self, message_count: usize) -> u32 {
209        self.summarizer.estimate_summary_tokens(message_count)
210    }
211}
212
213/// Mode controlling how the LLM summarizer handles existing summaries.
214#[derive(Debug, Clone, Default)]
215pub enum SummaryMode {
216    /// Generate a complete summary from scratch (default).
217    #[default]
218    FullRewrite,
219    /// Update an existing summary by incorporating new information incrementally.
220    IncrementalMerge,
221}
222
223/// LLM-based summarizer that calls the current session's model to generate
224/// a rich summary of compressed/removed messages.
225///
226/// Falls back to [`HeuristicSummarizer`] if the LLM call fails.
227pub struct LlmSummarizer {
228    llm: Arc<dyn LLMProvider>,
229    model: String,
230    /// Optional existing summary to build upon (incremental summarization).
231    existing_summary: Option<String>,
232    /// Optional current task list prompt so summary generation can distinguish
233    /// active vs completed/obsolete work using the session's source of truth.
234    task_list_prompt: Option<String>,
235    /// Optional user-provided instructions that override/extend the default summary focus.
236    custom_instructions: Option<String>,
237    /// Controls how the summarizer handles existing summaries.
238    summary_mode: SummaryMode,
239}
240
241impl LlmSummarizer {
242    pub fn new(
243        llm: Arc<dyn LLMProvider>,
244        model: String,
245        existing_summary: Option<String>,
246        task_list_prompt: Option<String>,
247    ) -> Self {
248        Self {
249            llm,
250            model,
251            existing_summary,
252            task_list_prompt,
253            custom_instructions: None,
254            summary_mode: SummaryMode::default(),
255        }
256    }
257
258    pub fn with_custom_instructions(mut self, instructions: Option<String>) -> Self {
259        self.custom_instructions = instructions;
260        self
261    }
262
263    pub fn with_summary_mode(mut self, mode: SummaryMode) -> Self {
264        self.summary_mode = mode;
265        self
266    }
267
268    /// Build the summarization prompt for the LLM.
269    fn build_summarization_messages(&self, messages: &[Message]) -> Vec<Message> {
270        let mut prompt_messages = Vec::new();
271
272        let system_prompt = match self.summary_mode {
273            SummaryMode::FullRewrite => {
274                r#"You are a conversation summarizer. Your task is to create a concise but reliable working-memory summary for a conversation that was removed due to context window limits.
275
276Guidelines:
277- First capture the in-flight work right before compression (what was being done, where, and with which tool/file)
278- Distinguish clearly between CURRENT ACTIVE work, COMPLETED work, and OBSOLETE or superseded work
279- Do not restate old tasks as active unless they are still unresolved
280- The provided current task list is the source of truth for active work
281- Preserve key decisions, constraints, file paths, code changes, tool findings, blockers, and important outcomes
282- Preserve error messages, test results (pass/fail counts), and function/variable names that are relevant to active work
283- If earlier plans conflict with newer messages or the current task list, mark them as obsolete or completed
284- Explicitly evaluate each clear user requirement (e.g. requirement 1, requirement 2) with a status and evidence
285- Keep the next step specific and aligned with the active work only
286- Use structured sections
287- Write in the same language as the original conversation"#
288            }
289            SummaryMode::IncrementalMerge => {
290                r#"You are updating an existing conversation summary with new information from recent messages.
291
292Guidelines:
293- Incorporate new information into the existing summary structure
294- Mark previously active work as completed if the new messages confirm completion
295- Remove or condense information that is no longer relevant
296- Preserve all key decisions, file paths, and constraints that remain active
297- If new messages conflict with the existing summary, the new messages take precedence
298- Keep the summary focused on what is currently active and relevant
299- The provided current task list is the source of truth for active work
300- Maintain the same structured sections as the existing summary
301- Write in the same language as the original conversation
302- Be concise: avoid repeating information already well-captured in the existing summary"#
303            }
304        };
305
306        prompt_messages.push(Message::system(system_prompt));
307
308        let mut user_content = String::new();
309
310        if let Some(ref existing) = self.existing_summary {
311            user_content.push_str("## Previous Summary\n\n");
312            user_content.push_str(existing);
313            user_content.push_str("\n\n---\n\n");
314        }
315
316        if let Some(task_list_prompt) = self
317            .task_list_prompt
318            .as_deref()
319            .map(str::trim)
320            .filter(|value| !value.is_empty())
321        {
322            user_content.push_str("## Current Task List\n\n");
323            user_content.push_str(task_list_prompt);
324            user_content.push_str("\n\n---\n\n");
325        }
326
327        if let Some(ref instructions) = self.custom_instructions {
328            if !instructions.trim().is_empty() {
329                user_content.push_str("## Custom Compression Instructions\n\n");
330                user_content.push_str(instructions.trim());
331                user_content.push_str("\n\n---\n\n");
332            }
333        }
334
335        user_content.push_str(
336            "## Required Output Sections\n1. Pre-compression in-flight work (what was being done immediately before compression)\n2. Current active objective\n3. Requirement checklist (Requirement | Status: completed/in_progress/pending/blocked/obsolete | Evidence)\n4. Active tasks\n5. Completed tasks\n6. Obsolete or superseded tasks\n7. Important context and constraints\n8. Files, code, and tool findings\n9. Open issues and next step\n\n",
337        );
338
339        user_content.push_str("## Messages to Summarize\n\n");
340
341        for message in messages {
342            let role_label = match message.role {
343                Role::User => "User",
344                Role::Assistant => "Assistant",
345                Role::Tool => "Tool Result",
346                Role::System => continue,
347            };
348
349            if let Some(ref tool_calls) = message.tool_calls {
350                if !tool_calls.is_empty() {
351                    let tool_names: Vec<&str> = tool_calls
352                        .iter()
353                        .map(|tc| tc.function.name.as_str())
354                        .collect();
355                    user_content.push_str(&format!(
356                        "**{}** [called tools: {}]:\n",
357                        role_label,
358                        tool_names.join(", ")
359                    ));
360                } else {
361                    user_content.push_str(&format!("**{}**:\n", role_label));
362                }
363            } else {
364                user_content.push_str(&format!("**{}**:\n", role_label));
365            }
366
367            if let Some(ref tool_call_id) = message.tool_call_id {
368                user_content.push_str(&format!("(tool_call_id: {})\n", tool_call_id));
369            }
370
371            let content = &message.content;
372            const MAX_CONTENT_CHARS: usize = 2000;
373            if content.chars().count() > MAX_CONTENT_CHARS {
374                let truncated: String = content.chars().take(MAX_CONTENT_CHARS).collect();
375                user_content.push_str(&truncated);
376                user_content.push_str("... [truncated]\n\n");
377            } else {
378                user_content.push_str(content);
379                user_content.push_str("\n\n");
380            }
381        }
382
383        user_content.push_str(
384            "\n---\n\nReturn only the summary text. Be explicit about what is active now versus what is already completed or no longer relevant.",
385        );
386
387        prompt_messages.push(Message::user(user_content));
388
389        prompt_messages
390    }
391
392    /// Consume an LLM stream and collect the full text response.
393    async fn collect_stream_response(
394        &self,
395        messages: &[Message],
396    ) -> Result<String, crate::types::BudgetError> {
397        // Summarization is a lightweight auxiliary request; cap reasoning effort at `high`
398        // to stay compatible with fast models (e.g. gpt-5-mini).
399        let options = LLMRequestOptions {
400            session_id: None,
401            reasoning_effort: Some(ReasoningEffort::High),
402            parallel_tool_calls: None,
403            responses: None,
404            request_purpose: Some("compression".to_string()),
405        };
406        let stream = self
407            .llm
408            .chat_stream_with_options(messages, &[], Some(8192), &self.model, Some(&options))
409            .await
410            .map_err(|e| {
411                crate::types::BudgetError::TokenCountError(format!(
412                    "LLM summarization call failed: {}",
413                    e
414                ))
415            })?;
416
417        let mut content = String::new();
418        let mut stream = stream;
419
420        while let Some(chunk_result) = stream.next().await {
421            match chunk_result {
422                Ok(LLMChunk::Token(text)) => content.push_str(&text),
423                Ok(LLMChunk::Done) => break,
424                Ok(_) => {} // Ignore reasoning tokens, tool calls, etc.
425                Err(e) => {
426                    tracing::warn!("LLM summarization stream error: {}", e);
427                    if !content.is_empty() {
428                        break;
429                    }
430                    return Err(crate::types::BudgetError::TokenCountError(format!(
431                        "LLM summarization stream failed: {}",
432                        e
433                    )));
434                }
435            }
436        }
437
438        Ok(content)
439    }
440}
441
442impl std::fmt::Debug for LlmSummarizer {
443    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
444        f.debug_struct("LlmSummarizer")
445            .field("model", &self.model)
446            .field("has_existing_summary", &self.existing_summary.is_some())
447            .finish()
448    }
449}
450
451#[async_trait]
452impl Summarizer for LlmSummarizer {
453    async fn summarize(&self, messages: &[Message]) -> Result<String, crate::types::BudgetError> {
454        if messages.is_empty() {
455            return Ok("No conversation history to summarize.".to_string());
456        }
457
458        let prompt_messages = self.build_summarization_messages(messages);
459
460        tracing::info!(
461            "LlmSummarizer: summarizing {} messages using model '{}' (existing_summary={})",
462            messages.len(),
463            self.model,
464            self.existing_summary.is_some()
465        );
466
467        match self.collect_stream_response(&prompt_messages).await {
468            Ok(summary) if !summary.trim().is_empty() => {
469                tracing::info!("LlmSummarizer: generated summary ({} chars)", summary.len());
470                Ok(summary)
471            }
472            Ok(_) => {
473                tracing::warn!(
474                    "LlmSummarizer: LLM returned empty summary, falling back to heuristic"
475                );
476                HeuristicSummarizer::new().summarize(messages).await
477            }
478            Err(e) => {
479                tracing::warn!(
480                    "LlmSummarizer: LLM call failed ({}), falling back to heuristic",
481                    e
482                );
483                HeuristicSummarizer::new().summarize(messages).await
484            }
485        }
486    }
487
488    fn estimate_summary_tokens(&self, message_count: usize) -> u32 {
489        // LLM summaries tend to be more detailed; estimate higher than heuristic
490        (message_count * 80).min(2000) as u32
491    }
492}
493
494#[cfg(test)]
495mod tests {
496    use super::*;
497    use async_trait::async_trait;
498    use bamboo_domain::ReasoningEffort;
499    use bamboo_infrastructure::{LLMChunk, LLMError, LLMRequestOptions, LLMStream};
500    use futures::stream;
501    use std::sync::Mutex;
502
503    struct DummyProvider;
504
505    #[async_trait]
506    impl LLMProvider for DummyProvider {
507        async fn chat_stream(
508            &self,
509            _messages: &[Message],
510            _tools: &[bamboo_agent_core::ToolSchema],
511            _max_output_tokens: Option<u32>,
512            _model: &str,
513        ) -> Result<LLMStream, LLMError> {
514            Ok(Box::pin(stream::iter(vec![
515                Ok::<LLMChunk, LLMError>(LLMChunk::Token("dummy summary".to_string())),
516                Ok::<LLMChunk, LLMError>(LLMChunk::Done),
517            ])))
518        }
519    }
520
521    #[test]
522    fn heuristic_summarizer_extracts_user_questions() {
523        let summarizer = HeuristicSummarizer::new();
524        let messages = vec![
525            Message::user("What is the weather?"),
526            Message::assistant("It's sunny.", None),
527            Message::user("What about tomorrow?"),
528        ];
529
530        let questions = summarizer.extract_user_questions(&messages);
531        assert_eq!(questions.len(), 2);
532        assert!(questions[0].contains("weather"));
533    }
534
535    #[test]
536    fn heuristic_summarizer_extracts_tools_used() {
537        use bamboo_agent_core::{FunctionCall, ToolCall};
538
539        let summarizer = HeuristicSummarizer::new();
540        let tool_call = ToolCall {
541            id: "call_1".to_string(),
542            tool_type: "function".to_string(),
543            function: FunctionCall {
544                name: "search".to_string(),
545                arguments: "{}".to_string(),
546            },
547        };
548
549        let messages = vec![
550            Message::user("Search for something"),
551            Message::assistant("I'll search", Some(vec![tool_call])),
552        ];
553
554        let tools = summarizer.extract_tools_used(&messages);
555        assert_eq!(tools, vec!["search"]);
556    }
557
558    #[test]
559    fn heuristic_summarizer_extracts_key_responses() {
560        let summarizer = HeuristicSummarizer::new();
561        let messages = vec![
562            Message::user("Hello"),
563            Message::assistant("First response", None),
564            Message::user("How are you?"),
565            Message::assistant("Most recent response", None),
566        ];
567
568        let responses = summarizer.extract_key_responses(&messages);
569        // Should return most recent first
570        assert_eq!(responses[0], "Most recent response");
571    }
572
573    #[tokio::test]
574    async fn heuristic_summarizer_generates_summary() {
575        let summarizer = HeuristicSummarizer::new();
576        let messages = vec![
577            Message::user("What is Rust?"),
578            Message::assistant("Rust is a systems programming language.", None),
579        ];
580
581        let summary = summarizer.summarize(&messages).await.unwrap();
582        assert!(summary.contains("User Requests"));
583        assert!(summary.contains("What is Rust?"));
584    }
585
586    #[test]
587    fn summary_trigger_on_truncation() {
588        let trigger = SummaryTrigger::OnTruncation;
589
590        assert!(matches!(trigger, SummaryTrigger::OnTruncation));
591        // When truncation_occurred is true
592        assert!(matches!(trigger, SummaryTrigger::OnTruncation));
593        // When truncation_occurred is false - just verify the trigger type
594    }
595
596    #[test]
597    fn summary_trigger_periodic() {
598        let trigger = SummaryTrigger::Periodic { interval: 5 };
599        let messages: Vec<Message> = (0..5).map(|_| Message::user("Test")).collect();
600
601        // Verify the trigger is periodic with correct interval
602        if let SummaryTrigger::Periodic { interval } = trigger {
603            assert_eq!(interval, 5);
604            assert!(messages.len() >= interval);
605        } else {
606            panic!("Expected Periodic trigger");
607        }
608    }
609
610    #[test]
611    fn summary_trigger_token_threshold() {
612        let trigger = SummaryTrigger::TokenThreshold { threshold: 1000 };
613
614        // Verify the trigger has the correct threshold
615        if let SummaryTrigger::TokenThreshold { threshold } = trigger {
616            assert_eq!(threshold, 1000);
617        } else {
618            panic!("Expected TokenThreshold trigger");
619        }
620    }
621
622    #[test]
623    fn safe_truncate_handles_ascii() {
624        let summarizer = HeuristicSummarizer::new();
625        let text = "Hello world this is a test";
626        let truncated = summarizer.safe_truncate(text, 10);
627
628        assert!(truncated.ends_with("..."));
629        // Should have at most 10 characters + "..."
630        assert!(truncated.chars().count() <= 13);
631    }
632
633    #[test]
634    fn safe_truncate_handles_unicode() {
635        let summarizer = HeuristicSummarizer::new();
636
637        // Test with emoji (multi-byte UTF-8)
638        let text = "Hello 😀🎉🚀 World with emoji";
639        let truncated = summarizer.safe_truncate(text, 10);
640
641        // Should not panic and should end with "..."
642        assert!(truncated.ends_with("..."));
643        assert!(truncated.chars().count() <= 13);
644    }
645
646    #[test]
647    fn safe_truncate_handles_cjk() {
648        let summarizer = HeuristicSummarizer::new();
649
650        // Test with Chinese/Japanese/Korean characters (3-byte UTF-8)
651        let text = "这是一个中文测试消息用于验证截断";
652        let truncated = summarizer.safe_truncate(text, 10);
653
654        // Should not panic
655        assert!(truncated.ends_with("..."));
656        assert!(truncated.chars().count() <= 13);
657    }
658
659    #[test]
660    fn safe_truncate_handles_mixed_unicode() {
661        let summarizer = HeuristicSummarizer::new();
662
663        // Mixed ASCII, CJK, and emoji
664        let text = "Hello 世界 🌍 test message";
665        let truncated = summarizer.safe_truncate(text, 8);
666
667        // Should not panic
668        assert!(truncated.ends_with("..."));
669        assert!(truncated.chars().count() <= 11);
670    }
671
672    #[tokio::test]
673    async fn summarizer_handles_unicode_messages() {
674        let summarizer = HeuristicSummarizer::new();
675
676        // Create messages with unicode that needs truncation
677        let long_unicode =
678            "这是一段很长的中文消息需要被截断以测试我们的安全截断功能 😀🎉🚀".repeat(10);
679        let messages = vec![
680            Message::user(&long_unicode),
681            Message::assistant("Response", None),
682        ];
683
684        // Should not panic on unicode truncation
685        let summary = summarizer.summarize(&messages).await.unwrap();
686        assert!(summary.contains("User Requests"));
687    }
688
689    #[test]
690    fn safe_truncate_returns_short_text_unchanged() {
691        let summarizer = HeuristicSummarizer::new();
692        let text = "Short";
693        let truncated = summarizer.safe_truncate(text, 100);
694
695        // Should return unchanged
696        assert_eq!(truncated, text);
697    }
698
699    #[test]
700    fn llm_summarizer_prompt_includes_task_list_and_state_sections() {
701        let summarizer = LlmSummarizer::new(
702            Arc::new(DummyProvider),
703            "gpt-4o-mini".to_string(),
704            Some("Earlier summary".to_string()),
705            Some(
706                "## Current Task List\n[/] task_1: Fix compression bounce\n[x] task_0: Analyze bug"
707                    .to_string(),
708            ),
709        );
710        let messages = vec![
711            Message::user("继续做压缩修复"),
712            Message::assistant("我先检查 trigger 与 target", None),
713        ];
714
715        let prompt_messages = summarizer.build_summarization_messages(&messages);
716        assert_eq!(prompt_messages.len(), 2);
717        assert_eq!(prompt_messages[0].role, Role::System);
718        assert!(prompt_messages[1].content.contains("## Current Task List"));
719        assert!(prompt_messages[1]
720            .content
721            .contains("Current active objective"));
722        assert!(prompt_messages[1].content.contains("Requirement checklist"));
723        assert!(prompt_messages[1].content.contains("Active tasks"));
724        assert!(prompt_messages[1].content.contains("Completed tasks"));
725        assert!(prompt_messages[1]
726            .content
727            .contains("Obsolete or superseded tasks"));
728        assert!(prompt_messages[1].content.contains("Earlier summary"));
729    }
730
731    #[derive(Default)]
732    struct ReasoningCaptureProvider {
733        captured_reasoning: Mutex<Vec<Option<ReasoningEffort>>>,
734    }
735
736    #[async_trait]
737    impl LLMProvider for ReasoningCaptureProvider {
738        async fn chat_stream(
739            &self,
740            _messages: &[Message],
741            _tools: &[bamboo_agent_core::ToolSchema],
742            _max_output_tokens: Option<u32>,
743            _model: &str,
744        ) -> Result<LLMStream, LLMError> {
745            Ok(Box::pin(stream::iter(vec![
746                Ok::<LLMChunk, LLMError>(LLMChunk::Token("captured summary".to_string())),
747                Ok::<LLMChunk, LLMError>(LLMChunk::Done),
748            ])))
749        }
750
751        async fn chat_stream_with_options(
752            &self,
753            messages: &[Message],
754            tools: &[bamboo_agent_core::ToolSchema],
755            max_output_tokens: Option<u32>,
756            model: &str,
757            options: Option<&LLMRequestOptions>,
758        ) -> Result<LLMStream, LLMError> {
759            self.captured_reasoning
760                .lock()
761                .expect("captured reasoning lock should not be poisoned")
762                .push(options.and_then(|o| o.reasoning_effort));
763            self.chat_stream(messages, tools, max_output_tokens, model)
764                .await
765        }
766    }
767
768    #[tokio::test]
769    async fn llm_summarizer_requests_high_reasoning_effort_for_summary_calls() {
770        let provider = Arc::new(ReasoningCaptureProvider::default());
771        let summarizer = LlmSummarizer::new(
772            provider.clone(),
773            "gpt-5-mini".to_string(),
774            None,
775            Some("task list".to_string()),
776        );
777        let messages = vec![
778            Message::user("请总结最近三轮"),
779            Message::assistant("已完成第一步并准备第二步", None),
780        ];
781
782        let summary = summarizer
783            .summarize(&messages)
784            .await
785            .expect("summary generation should succeed");
786        assert_eq!(summary, "captured summary");
787
788        let captured = provider
789            .captured_reasoning
790            .lock()
791            .expect("captured reasoning lock should not be poisoned");
792        assert_eq!(captured.as_slice(), [Some(ReasoningEffort::High)]);
793    }
794
795    /// Provider that captures both `reasoning_effort` and `max_output_tokens`.
796    #[derive(Default)]
797    struct RequestOptionsCaptureProvider {
798        captured_reasoning: Mutex<Vec<Option<ReasoningEffort>>>,
799        captured_max_tokens: Mutex<Vec<Option<u32>>>,
800    }
801
802    #[async_trait]
803    impl LLMProvider for RequestOptionsCaptureProvider {
804        async fn chat_stream(
805            &self,
806            _messages: &[Message],
807            _tools: &[bamboo_agent_core::ToolSchema],
808            _max_output_tokens: Option<u32>,
809            _model: &str,
810        ) -> Result<LLMStream, LLMError> {
811            Ok(Box::pin(stream::iter(vec![
812                Ok::<LLMChunk, LLMError>(LLMChunk::Token("captured summary".to_string())),
813                Ok::<LLMChunk, LLMError>(LLMChunk::Done),
814            ])))
815        }
816
817        async fn chat_stream_with_options(
818            &self,
819            messages: &[Message],
820            tools: &[bamboo_agent_core::ToolSchema],
821            max_output_tokens: Option<u32>,
822            model: &str,
823            options: Option<&LLMRequestOptions>,
824        ) -> Result<LLMStream, LLMError> {
825            self.captured_reasoning
826                .lock()
827                .expect("lock should not be poisoned")
828                .push(options.and_then(|o| o.reasoning_effort));
829            self.captured_max_tokens
830                .lock()
831                .expect("lock should not be poisoned")
832                .push(max_output_tokens);
833            self.chat_stream(messages, tools, max_output_tokens, model)
834                .await
835        }
836    }
837
838    #[tokio::test]
839    async fn llm_summarizer_sufficient_max_tokens_for_high_reasoning() {
840        let provider = Arc::new(RequestOptionsCaptureProvider::default());
841        let summarizer = LlmSummarizer::new(
842            provider.clone(),
843            "gpt-5-mini".to_string(),
844            None,
845            Some("task list".to_string()),
846        );
847        let messages = vec![
848            Message::user("请总结最近三轮"),
849            Message::assistant("已完成第一步并准备第二步", None),
850        ];
851
852        let summary = summarizer
853            .summarize(&messages)
854            .await
855            .expect("summary generation should succeed");
856        assert_eq!(summary, "captured summary");
857
858        let captured_reasoning = provider
859            .captured_reasoning
860            .lock()
861            .expect("lock should not be poisoned");
862        let captured_max_tokens = provider
863            .captured_max_tokens
864            .lock()
865            .expect("lock should not be poisoned");
866        assert_eq!(captured_reasoning.as_slice(), [Some(ReasoningEffort::High)]);
867        let max_tokens = captured_max_tokens[0].expect("max_output_tokens should be set");
868        // ReasoningEffort::High targets 4096 thinking budget; max_tokens must leave room for output.
869        assert!(
870            max_tokens > 4096,
871            "max_output_tokens ({}) must exceed thinking budget (4096) to avoid truncation",
872            max_tokens
873        );
874    }
875
876    #[test]
877    fn full_rewrite_mode_uses_default_system_prompt() {
878        let summarizer =
879            LlmSummarizer::new(Arc::new(DummyProvider), "model".to_string(), None, None)
880                .with_summary_mode(SummaryMode::FullRewrite);
881        let messages = vec![Message::user("hello"), Message::assistant("hi", None)];
882        let prompts = summarizer.build_summarization_messages(&messages);
883        let system = &prompts[0].content;
884        assert!(
885            system.contains("conversation summarizer"),
886            "FullRewrite prompt should contain 'conversation summarizer'"
887        );
888        assert!(
889            !system.contains("updating an existing"),
890            "FullRewrite prompt should not contain incremental language"
891        );
892    }
893
894    #[test]
895    fn incremental_merge_mode_uses_update_system_prompt() {
896        let summarizer = LlmSummarizer::new(
897            Arc::new(DummyProvider),
898            "model".to_string(),
899            Some("Previous summary content".to_string()),
900            None,
901        )
902        .with_summary_mode(SummaryMode::IncrementalMerge);
903        let messages = vec![Message::user("hello"), Message::assistant("hi", None)];
904        let prompts = summarizer.build_summarization_messages(&messages);
905        let system = &prompts[0].content;
906        assert!(
907            system.contains("updating an existing conversation summary"),
908            "IncrementalMerge prompt should contain 'updating an existing conversation summary'"
909        );
910        assert!(
911            system.contains("Incorporate new information"),
912            "IncrementalMerge prompt should mention incorporating new information"
913        );
914    }
915
916    #[test]
917    fn default_summary_mode_is_full_rewrite() {
918        assert!(matches!(SummaryMode::default(), SummaryMode::FullRewrite));
919    }
920
921    #[test]
922    fn incremental_merge_includes_existing_summary_in_user_content() {
923        let summarizer = LlmSummarizer::new(
924            Arc::new(DummyProvider),
925            "model".to_string(),
926            Some("Previous summary content".to_string()),
927            None,
928        )
929        .with_summary_mode(SummaryMode::IncrementalMerge);
930        let messages = vec![
931            Message::user("new work"),
932            Message::assistant("doing it", None),
933        ];
934        let prompts = summarizer.build_summarization_messages(&messages);
935        let user_content = &prompts[1].content;
936        assert!(
937            user_content.contains("Previous Summary"),
938            "IncrementalMerge user prompt should include the existing summary"
939        );
940        assert!(
941            user_content.contains("Previous summary content"),
942            "IncrementalMerge user prompt should include the actual summary text"
943        );
944    }
945}