1use async_trait::async_trait;
7use bamboo_agent_core::{Message, Role};
8use bamboo_domain::ReasoningEffort;
9use bamboo_infrastructure::LLMChunk;
10use bamboo_infrastructure::{LLMProvider, LLMRequestOptions};
11use futures::StreamExt;
12use std::collections::HashSet;
13use std::sync::Arc;
14
15#[async_trait]
17pub trait Summarizer: Send + Sync {
18 async fn summarize(&self, messages: &[Message]) -> Result<String, crate::types::BudgetError>;
22
23 fn estimate_summary_tokens(&self, message_count: usize) -> u32 {
27 (message_count * 50).min(1000) as u32
29 }
30}
31
32#[derive(Debug, Default)]
41pub struct HeuristicSummarizer;
42
43impl HeuristicSummarizer {
44 pub fn new() -> Self {
46 Self
47 }
48
49 fn extract_user_questions<'a>(&self, messages: &'a [Message]) -> Vec<&'a str> {
51 messages
52 .iter()
53 .filter(|m| m.role == Role::User)
54 .filter(|m| !m.content.is_empty())
55 .take(10) .map(|m| m.content.as_str())
57 .collect()
58 }
59
60 fn extract_tools_used(&self, messages: &[Message]) -> Vec<String> {
62 let mut tools = HashSet::new();
63
64 for message in messages {
65 if let Some(ref tool_calls) = message.tool_calls {
66 for call in tool_calls {
67 tools.insert(call.function.name.clone());
68 }
69 }
70 }
71
72 let mut result: Vec<String> = tools.into_iter().collect();
73 result.sort();
74 result
75 }
76
77 fn extract_key_responses<'a>(&self, messages: &'a [Message]) -> Vec<&'a str> {
79 messages
80 .iter()
81 .filter(|m| m.role == Role::Assistant)
82 .filter(|m| !m.content.is_empty())
83 .rev() .take(3)
85 .map(|m| m.content.as_str())
86 .collect()
87 }
88
89 fn safe_truncate(&self, s: &str, max_chars: usize) -> String {
92 if s.chars().count() <= max_chars {
93 return s.to_string();
94 }
95
96 let truncated: String = s.chars().take(max_chars).collect();
98 format!("{}...", truncated)
99 }
100}
101
102#[async_trait]
103impl Summarizer for HeuristicSummarizer {
104 async fn summarize(&self, messages: &[Message]) -> Result<String, crate::types::BudgetError> {
105 if messages.is_empty() {
106 return Ok("No conversation history.".to_string());
107 }
108
109 let questions = self.extract_user_questions(messages);
110 let tools = self.extract_tools_used(messages);
111 let responses = self.extract_key_responses(messages);
112
113 let mut summary_parts = Vec::new();
114
115 if !questions.is_empty() {
117 summary_parts.push("## User Requests".to_string());
118 for (i, q) in questions.iter().enumerate() {
119 let truncated = self.safe_truncate(q, 200);
121 summary_parts.push(format!("{}. {}", i + 1, truncated));
122 }
123 }
124
125 if !tools.is_empty() {
127 summary_parts.push("\n## Tools Used".to_string());
128 for tool in tools {
129 summary_parts.push(format!("- {}", tool));
130 }
131 }
132
133 if !responses.is_empty() {
135 summary_parts.push("\n## Key Outcomes".to_string());
136 for (i, r) in responses.iter().enumerate() {
137 let truncated = self.safe_truncate(r, 300);
139 summary_parts.push(format!("{}. {}", i + 1, truncated));
140 }
141 }
142
143 if summary_parts.is_empty() {
144 Ok("Previous conversation context available.".to_string())
145 } else {
146 Ok(summary_parts.join("\n"))
147 }
148 }
149}
150
151#[derive(Debug, Clone)]
153pub enum SummaryTrigger {
154 OnTruncation,
156 Periodic { interval: usize },
158 TokenThreshold { threshold: u32 },
160}
161
162pub struct SummaryManager {
164 summarizer: Box<dyn Summarizer>,
165 trigger: SummaryTrigger,
166}
167
168impl std::fmt::Debug for SummaryManager {
169 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
170 f.debug_struct("SummaryManager")
171 .field("trigger", &self.trigger)
172 .finish_non_exhaustive()
173 }
174}
175
176impl SummaryManager {
177 pub fn new(summarizer: impl Summarizer + 'static, trigger: SummaryTrigger) -> Self {
179 Self {
180 summarizer: Box::new(summarizer),
181 trigger,
182 }
183 }
184
185 pub fn should_summarize(
187 &self,
188 messages: &[Message],
189 _truncation_occurred: bool,
190 current_token_count: u32,
191 ) -> bool {
192 match &self.trigger {
193 SummaryTrigger::OnTruncation => _truncation_occurred,
194 SummaryTrigger::Periodic { interval } => messages.len() >= *interval,
195 SummaryTrigger::TokenThreshold { threshold } => current_token_count >= *threshold,
196 }
197 }
198
199 pub async fn summarize(
201 &self,
202 messages: &[Message],
203 ) -> Result<String, crate::types::BudgetError> {
204 self.summarizer.summarize(messages).await
205 }
206
207 pub fn estimate_summary_tokens(&self, message_count: usize) -> u32 {
209 self.summarizer.estimate_summary_tokens(message_count)
210 }
211}
212
213#[derive(Debug, Clone, Default)]
215pub enum SummaryMode {
216 #[default]
218 FullRewrite,
219 IncrementalMerge,
221}
222
223pub struct LlmSummarizer {
228 llm: Arc<dyn LLMProvider>,
229 model: String,
230 existing_summary: Option<String>,
232 task_list_prompt: Option<String>,
235 custom_instructions: Option<String>,
237 summary_mode: SummaryMode,
239}
240
241impl LlmSummarizer {
242 pub fn new(
243 llm: Arc<dyn LLMProvider>,
244 model: String,
245 existing_summary: Option<String>,
246 task_list_prompt: Option<String>,
247 ) -> Self {
248 Self {
249 llm,
250 model,
251 existing_summary,
252 task_list_prompt,
253 custom_instructions: None,
254 summary_mode: SummaryMode::default(),
255 }
256 }
257
258 pub fn with_custom_instructions(mut self, instructions: Option<String>) -> Self {
259 self.custom_instructions = instructions;
260 self
261 }
262
263 pub fn with_summary_mode(mut self, mode: SummaryMode) -> Self {
264 self.summary_mode = mode;
265 self
266 }
267
268 fn build_summarization_messages(&self, messages: &[Message]) -> Vec<Message> {
270 let mut prompt_messages = Vec::new();
271
272 let system_prompt = match self.summary_mode {
273 SummaryMode::FullRewrite => {
274 r#"You are a conversation summarizer. Your task is to create a concise but reliable working-memory summary for a conversation that was removed due to context window limits.
275
276Guidelines:
277- First capture the in-flight work right before compression (what was being done, where, and with which tool/file)
278- Distinguish clearly between CURRENT ACTIVE work, COMPLETED work, and OBSOLETE or superseded work
279- Do not restate old tasks as active unless they are still unresolved
280- The provided current task list is the source of truth for active work
281- Preserve key decisions, constraints, file paths, code changes, tool findings, blockers, and important outcomes
282- Preserve error messages, test results (pass/fail counts), and function/variable names that are relevant to active work
283- If earlier plans conflict with newer messages or the current task list, mark them as obsolete or completed
284- Explicitly evaluate each clear user requirement (e.g. requirement 1, requirement 2) with a status and evidence
285- Keep the next step specific and aligned with the active work only
286- Use structured sections
287- Write in the same language as the original conversation"#
288 }
289 SummaryMode::IncrementalMerge => {
290 r#"You are updating an existing conversation summary with new information from recent messages.
291
292Guidelines:
293- Incorporate new information into the existing summary structure
294- Mark previously active work as completed if the new messages confirm completion
295- Remove or condense information that is no longer relevant
296- Preserve all key decisions, file paths, and constraints that remain active
297- If new messages conflict with the existing summary, the new messages take precedence
298- Keep the summary focused on what is currently active and relevant
299- The provided current task list is the source of truth for active work
300- Maintain the same structured sections as the existing summary
301- Write in the same language as the original conversation
302- Be concise: avoid repeating information already well-captured in the existing summary"#
303 }
304 };
305
306 prompt_messages.push(Message::system(system_prompt));
307
308 let mut user_content = String::new();
309
310 if let Some(ref existing) = self.existing_summary {
311 user_content.push_str("## Previous Summary\n\n");
312 user_content.push_str(existing);
313 user_content.push_str("\n\n---\n\n");
314 }
315
316 if let Some(task_list_prompt) = self
317 .task_list_prompt
318 .as_deref()
319 .map(str::trim)
320 .filter(|value| !value.is_empty())
321 {
322 user_content.push_str("## Current Task List\n\n");
323 user_content.push_str(task_list_prompt);
324 user_content.push_str("\n\n---\n\n");
325 }
326
327 if let Some(ref instructions) = self.custom_instructions {
328 if !instructions.trim().is_empty() {
329 user_content.push_str("## Custom Compression Instructions\n\n");
330 user_content.push_str(instructions.trim());
331 user_content.push_str("\n\n---\n\n");
332 }
333 }
334
335 user_content.push_str(
336 "## Required Output Sections\n1. Pre-compression in-flight work (what was being done immediately before compression)\n2. Current active objective\n3. Requirement checklist (Requirement | Status: completed/in_progress/pending/blocked/obsolete | Evidence)\n4. Active tasks\n5. Completed tasks\n6. Obsolete or superseded tasks\n7. Important context and constraints\n8. Files, code, and tool findings\n9. Open issues and next step\n\n",
337 );
338
339 user_content.push_str("## Messages to Summarize\n\n");
340
341 for message in messages {
342 let role_label = match message.role {
343 Role::User => "User",
344 Role::Assistant => "Assistant",
345 Role::Tool => "Tool Result",
346 Role::System => continue,
347 };
348
349 if let Some(ref tool_calls) = message.tool_calls {
350 if !tool_calls.is_empty() {
351 let tool_names: Vec<&str> = tool_calls
352 .iter()
353 .map(|tc| tc.function.name.as_str())
354 .collect();
355 user_content.push_str(&format!(
356 "**{}** [called tools: {}]:\n",
357 role_label,
358 tool_names.join(", ")
359 ));
360 } else {
361 user_content.push_str(&format!("**{}**:\n", role_label));
362 }
363 } else {
364 user_content.push_str(&format!("**{}**:\n", role_label));
365 }
366
367 if let Some(ref tool_call_id) = message.tool_call_id {
368 user_content.push_str(&format!("(tool_call_id: {})\n", tool_call_id));
369 }
370
371 let content = &message.content;
372 const MAX_CONTENT_CHARS: usize = 2000;
373 if content.chars().count() > MAX_CONTENT_CHARS {
374 let truncated: String = content.chars().take(MAX_CONTENT_CHARS).collect();
375 user_content.push_str(&truncated);
376 user_content.push_str("... [truncated]\n\n");
377 } else {
378 user_content.push_str(content);
379 user_content.push_str("\n\n");
380 }
381 }
382
383 user_content.push_str(
384 "\n---\n\nReturn only the summary text. Be explicit about what is active now versus what is already completed or no longer relevant.",
385 );
386
387 prompt_messages.push(Message::user(user_content));
388
389 prompt_messages
390 }
391
392 async fn collect_stream_response(
394 &self,
395 messages: &[Message],
396 ) -> Result<String, crate::types::BudgetError> {
397 let options = LLMRequestOptions {
400 session_id: None,
401 reasoning_effort: Some(ReasoningEffort::High),
402 parallel_tool_calls: None,
403 responses: None,
404 request_purpose: Some("compression".to_string()),
405 };
406 let stream = self
407 .llm
408 .chat_stream_with_options(messages, &[], Some(8192), &self.model, Some(&options))
409 .await
410 .map_err(|e| {
411 crate::types::BudgetError::TokenCountError(format!(
412 "LLM summarization call failed: {}",
413 e
414 ))
415 })?;
416
417 let mut content = String::new();
418 let mut stream = stream;
419
420 while let Some(chunk_result) = stream.next().await {
421 match chunk_result {
422 Ok(LLMChunk::Token(text)) => content.push_str(&text),
423 Ok(LLMChunk::Done) => break,
424 Ok(_) => {} Err(e) => {
426 tracing::warn!("LLM summarization stream error: {}", e);
427 if !content.is_empty() {
428 break;
429 }
430 return Err(crate::types::BudgetError::TokenCountError(format!(
431 "LLM summarization stream failed: {}",
432 e
433 )));
434 }
435 }
436 }
437
438 Ok(content)
439 }
440}
441
442impl std::fmt::Debug for LlmSummarizer {
443 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
444 f.debug_struct("LlmSummarizer")
445 .field("model", &self.model)
446 .field("has_existing_summary", &self.existing_summary.is_some())
447 .finish()
448 }
449}
450
451#[async_trait]
452impl Summarizer for LlmSummarizer {
453 async fn summarize(&self, messages: &[Message]) -> Result<String, crate::types::BudgetError> {
454 if messages.is_empty() {
455 return Ok("No conversation history to summarize.".to_string());
456 }
457
458 let prompt_messages = self.build_summarization_messages(messages);
459
460 tracing::info!(
461 "LlmSummarizer: summarizing {} messages using model '{}' (existing_summary={})",
462 messages.len(),
463 self.model,
464 self.existing_summary.is_some()
465 );
466
467 match self.collect_stream_response(&prompt_messages).await {
468 Ok(summary) if !summary.trim().is_empty() => {
469 tracing::info!("LlmSummarizer: generated summary ({} chars)", summary.len());
470 Ok(summary)
471 }
472 Ok(_) => {
473 tracing::warn!(
474 "LlmSummarizer: LLM returned empty summary, falling back to heuristic"
475 );
476 HeuristicSummarizer::new().summarize(messages).await
477 }
478 Err(e) => {
479 tracing::warn!(
480 "LlmSummarizer: LLM call failed ({}), falling back to heuristic",
481 e
482 );
483 HeuristicSummarizer::new().summarize(messages).await
484 }
485 }
486 }
487
488 fn estimate_summary_tokens(&self, message_count: usize) -> u32 {
489 (message_count * 80).min(2000) as u32
491 }
492}
493
494#[cfg(test)]
495mod tests {
496 use super::*;
497 use async_trait::async_trait;
498 use bamboo_domain::ReasoningEffort;
499 use bamboo_infrastructure::{LLMChunk, LLMError, LLMRequestOptions, LLMStream};
500 use futures::stream;
501 use std::sync::Mutex;
502
503 struct DummyProvider;
504
505 #[async_trait]
506 impl LLMProvider for DummyProvider {
507 async fn chat_stream(
508 &self,
509 _messages: &[Message],
510 _tools: &[bamboo_agent_core::ToolSchema],
511 _max_output_tokens: Option<u32>,
512 _model: &str,
513 ) -> Result<LLMStream, LLMError> {
514 Ok(Box::pin(stream::iter(vec![
515 Ok::<LLMChunk, LLMError>(LLMChunk::Token("dummy summary".to_string())),
516 Ok::<LLMChunk, LLMError>(LLMChunk::Done),
517 ])))
518 }
519 }
520
521 #[test]
522 fn heuristic_summarizer_extracts_user_questions() {
523 let summarizer = HeuristicSummarizer::new();
524 let messages = vec![
525 Message::user("What is the weather?"),
526 Message::assistant("It's sunny.", None),
527 Message::user("What about tomorrow?"),
528 ];
529
530 let questions = summarizer.extract_user_questions(&messages);
531 assert_eq!(questions.len(), 2);
532 assert!(questions[0].contains("weather"));
533 }
534
535 #[test]
536 fn heuristic_summarizer_extracts_tools_used() {
537 use bamboo_agent_core::{FunctionCall, ToolCall};
538
539 let summarizer = HeuristicSummarizer::new();
540 let tool_call = ToolCall {
541 id: "call_1".to_string(),
542 tool_type: "function".to_string(),
543 function: FunctionCall {
544 name: "search".to_string(),
545 arguments: "{}".to_string(),
546 },
547 };
548
549 let messages = vec![
550 Message::user("Search for something"),
551 Message::assistant("I'll search", Some(vec![tool_call])),
552 ];
553
554 let tools = summarizer.extract_tools_used(&messages);
555 assert_eq!(tools, vec!["search"]);
556 }
557
558 #[test]
559 fn heuristic_summarizer_extracts_key_responses() {
560 let summarizer = HeuristicSummarizer::new();
561 let messages = vec![
562 Message::user("Hello"),
563 Message::assistant("First response", None),
564 Message::user("How are you?"),
565 Message::assistant("Most recent response", None),
566 ];
567
568 let responses = summarizer.extract_key_responses(&messages);
569 assert_eq!(responses[0], "Most recent response");
571 }
572
573 #[tokio::test]
574 async fn heuristic_summarizer_generates_summary() {
575 let summarizer = HeuristicSummarizer::new();
576 let messages = vec![
577 Message::user("What is Rust?"),
578 Message::assistant("Rust is a systems programming language.", None),
579 ];
580
581 let summary = summarizer.summarize(&messages).await.unwrap();
582 assert!(summary.contains("User Requests"));
583 assert!(summary.contains("What is Rust?"));
584 }
585
586 #[test]
587 fn summary_trigger_on_truncation() {
588 let trigger = SummaryTrigger::OnTruncation;
589
590 assert!(matches!(trigger, SummaryTrigger::OnTruncation));
591 assert!(matches!(trigger, SummaryTrigger::OnTruncation));
593 }
595
596 #[test]
597 fn summary_trigger_periodic() {
598 let trigger = SummaryTrigger::Periodic { interval: 5 };
599 let messages: Vec<Message> = (0..5).map(|_| Message::user("Test")).collect();
600
601 if let SummaryTrigger::Periodic { interval } = trigger {
603 assert_eq!(interval, 5);
604 assert!(messages.len() >= interval);
605 } else {
606 panic!("Expected Periodic trigger");
607 }
608 }
609
610 #[test]
611 fn summary_trigger_token_threshold() {
612 let trigger = SummaryTrigger::TokenThreshold { threshold: 1000 };
613
614 if let SummaryTrigger::TokenThreshold { threshold } = trigger {
616 assert_eq!(threshold, 1000);
617 } else {
618 panic!("Expected TokenThreshold trigger");
619 }
620 }
621
622 #[test]
623 fn safe_truncate_handles_ascii() {
624 let summarizer = HeuristicSummarizer::new();
625 let text = "Hello world this is a test";
626 let truncated = summarizer.safe_truncate(text, 10);
627
628 assert!(truncated.ends_with("..."));
629 assert!(truncated.chars().count() <= 13);
631 }
632
633 #[test]
634 fn safe_truncate_handles_unicode() {
635 let summarizer = HeuristicSummarizer::new();
636
637 let text = "Hello 😀🎉🚀 World with emoji";
639 let truncated = summarizer.safe_truncate(text, 10);
640
641 assert!(truncated.ends_with("..."));
643 assert!(truncated.chars().count() <= 13);
644 }
645
646 #[test]
647 fn safe_truncate_handles_cjk() {
648 let summarizer = HeuristicSummarizer::new();
649
650 let text = "这是一个中文测试消息用于验证截断";
652 let truncated = summarizer.safe_truncate(text, 10);
653
654 assert!(truncated.ends_with("..."));
656 assert!(truncated.chars().count() <= 13);
657 }
658
659 #[test]
660 fn safe_truncate_handles_mixed_unicode() {
661 let summarizer = HeuristicSummarizer::new();
662
663 let text = "Hello 世界 🌍 test message";
665 let truncated = summarizer.safe_truncate(text, 8);
666
667 assert!(truncated.ends_with("..."));
669 assert!(truncated.chars().count() <= 11);
670 }
671
672 #[tokio::test]
673 async fn summarizer_handles_unicode_messages() {
674 let summarizer = HeuristicSummarizer::new();
675
676 let long_unicode =
678 "这是一段很长的中文消息需要被截断以测试我们的安全截断功能 😀🎉🚀".repeat(10);
679 let messages = vec![
680 Message::user(&long_unicode),
681 Message::assistant("Response", None),
682 ];
683
684 let summary = summarizer.summarize(&messages).await.unwrap();
686 assert!(summary.contains("User Requests"));
687 }
688
689 #[test]
690 fn safe_truncate_returns_short_text_unchanged() {
691 let summarizer = HeuristicSummarizer::new();
692 let text = "Short";
693 let truncated = summarizer.safe_truncate(text, 100);
694
695 assert_eq!(truncated, text);
697 }
698
699 #[test]
700 fn llm_summarizer_prompt_includes_task_list_and_state_sections() {
701 let summarizer = LlmSummarizer::new(
702 Arc::new(DummyProvider),
703 "gpt-4o-mini".to_string(),
704 Some("Earlier summary".to_string()),
705 Some(
706 "## Current Task List\n[/] task_1: Fix compression bounce\n[x] task_0: Analyze bug"
707 .to_string(),
708 ),
709 );
710 let messages = vec![
711 Message::user("继续做压缩修复"),
712 Message::assistant("我先检查 trigger 与 target", None),
713 ];
714
715 let prompt_messages = summarizer.build_summarization_messages(&messages);
716 assert_eq!(prompt_messages.len(), 2);
717 assert_eq!(prompt_messages[0].role, Role::System);
718 assert!(prompt_messages[1].content.contains("## Current Task List"));
719 assert!(prompt_messages[1]
720 .content
721 .contains("Current active objective"));
722 assert!(prompt_messages[1].content.contains("Requirement checklist"));
723 assert!(prompt_messages[1].content.contains("Active tasks"));
724 assert!(prompt_messages[1].content.contains("Completed tasks"));
725 assert!(prompt_messages[1]
726 .content
727 .contains("Obsolete or superseded tasks"));
728 assert!(prompt_messages[1].content.contains("Earlier summary"));
729 }
730
731 #[derive(Default)]
732 struct ReasoningCaptureProvider {
733 captured_reasoning: Mutex<Vec<Option<ReasoningEffort>>>,
734 }
735
736 #[async_trait]
737 impl LLMProvider for ReasoningCaptureProvider {
738 async fn chat_stream(
739 &self,
740 _messages: &[Message],
741 _tools: &[bamboo_agent_core::ToolSchema],
742 _max_output_tokens: Option<u32>,
743 _model: &str,
744 ) -> Result<LLMStream, LLMError> {
745 Ok(Box::pin(stream::iter(vec![
746 Ok::<LLMChunk, LLMError>(LLMChunk::Token("captured summary".to_string())),
747 Ok::<LLMChunk, LLMError>(LLMChunk::Done),
748 ])))
749 }
750
751 async fn chat_stream_with_options(
752 &self,
753 messages: &[Message],
754 tools: &[bamboo_agent_core::ToolSchema],
755 max_output_tokens: Option<u32>,
756 model: &str,
757 options: Option<&LLMRequestOptions>,
758 ) -> Result<LLMStream, LLMError> {
759 self.captured_reasoning
760 .lock()
761 .expect("captured reasoning lock should not be poisoned")
762 .push(options.and_then(|o| o.reasoning_effort));
763 self.chat_stream(messages, tools, max_output_tokens, model)
764 .await
765 }
766 }
767
768 #[tokio::test]
769 async fn llm_summarizer_requests_high_reasoning_effort_for_summary_calls() {
770 let provider = Arc::new(ReasoningCaptureProvider::default());
771 let summarizer = LlmSummarizer::new(
772 provider.clone(),
773 "gpt-5-mini".to_string(),
774 None,
775 Some("task list".to_string()),
776 );
777 let messages = vec![
778 Message::user("请总结最近三轮"),
779 Message::assistant("已完成第一步并准备第二步", None),
780 ];
781
782 let summary = summarizer
783 .summarize(&messages)
784 .await
785 .expect("summary generation should succeed");
786 assert_eq!(summary, "captured summary");
787
788 let captured = provider
789 .captured_reasoning
790 .lock()
791 .expect("captured reasoning lock should not be poisoned");
792 assert_eq!(captured.as_slice(), [Some(ReasoningEffort::High)]);
793 }
794
795 #[derive(Default)]
797 struct RequestOptionsCaptureProvider {
798 captured_reasoning: Mutex<Vec<Option<ReasoningEffort>>>,
799 captured_max_tokens: Mutex<Vec<Option<u32>>>,
800 }
801
802 #[async_trait]
803 impl LLMProvider for RequestOptionsCaptureProvider {
804 async fn chat_stream(
805 &self,
806 _messages: &[Message],
807 _tools: &[bamboo_agent_core::ToolSchema],
808 _max_output_tokens: Option<u32>,
809 _model: &str,
810 ) -> Result<LLMStream, LLMError> {
811 Ok(Box::pin(stream::iter(vec![
812 Ok::<LLMChunk, LLMError>(LLMChunk::Token("captured summary".to_string())),
813 Ok::<LLMChunk, LLMError>(LLMChunk::Done),
814 ])))
815 }
816
817 async fn chat_stream_with_options(
818 &self,
819 messages: &[Message],
820 tools: &[bamboo_agent_core::ToolSchema],
821 max_output_tokens: Option<u32>,
822 model: &str,
823 options: Option<&LLMRequestOptions>,
824 ) -> Result<LLMStream, LLMError> {
825 self.captured_reasoning
826 .lock()
827 .expect("lock should not be poisoned")
828 .push(options.and_then(|o| o.reasoning_effort));
829 self.captured_max_tokens
830 .lock()
831 .expect("lock should not be poisoned")
832 .push(max_output_tokens);
833 self.chat_stream(messages, tools, max_output_tokens, model)
834 .await
835 }
836 }
837
838 #[tokio::test]
839 async fn llm_summarizer_sufficient_max_tokens_for_high_reasoning() {
840 let provider = Arc::new(RequestOptionsCaptureProvider::default());
841 let summarizer = LlmSummarizer::new(
842 provider.clone(),
843 "gpt-5-mini".to_string(),
844 None,
845 Some("task list".to_string()),
846 );
847 let messages = vec![
848 Message::user("请总结最近三轮"),
849 Message::assistant("已完成第一步并准备第二步", None),
850 ];
851
852 let summary = summarizer
853 .summarize(&messages)
854 .await
855 .expect("summary generation should succeed");
856 assert_eq!(summary, "captured summary");
857
858 let captured_reasoning = provider
859 .captured_reasoning
860 .lock()
861 .expect("lock should not be poisoned");
862 let captured_max_tokens = provider
863 .captured_max_tokens
864 .lock()
865 .expect("lock should not be poisoned");
866 assert_eq!(captured_reasoning.as_slice(), [Some(ReasoningEffort::High)]);
867 let max_tokens = captured_max_tokens[0].expect("max_output_tokens should be set");
868 assert!(
870 max_tokens > 4096,
871 "max_output_tokens ({}) must exceed thinking budget (4096) to avoid truncation",
872 max_tokens
873 );
874 }
875
876 #[test]
877 fn full_rewrite_mode_uses_default_system_prompt() {
878 let summarizer =
879 LlmSummarizer::new(Arc::new(DummyProvider), "model".to_string(), None, None)
880 .with_summary_mode(SummaryMode::FullRewrite);
881 let messages = vec![Message::user("hello"), Message::assistant("hi", None)];
882 let prompts = summarizer.build_summarization_messages(&messages);
883 let system = &prompts[0].content;
884 assert!(
885 system.contains("conversation summarizer"),
886 "FullRewrite prompt should contain 'conversation summarizer'"
887 );
888 assert!(
889 !system.contains("updating an existing"),
890 "FullRewrite prompt should not contain incremental language"
891 );
892 }
893
894 #[test]
895 fn incremental_merge_mode_uses_update_system_prompt() {
896 let summarizer = LlmSummarizer::new(
897 Arc::new(DummyProvider),
898 "model".to_string(),
899 Some("Previous summary content".to_string()),
900 None,
901 )
902 .with_summary_mode(SummaryMode::IncrementalMerge);
903 let messages = vec![Message::user("hello"), Message::assistant("hi", None)];
904 let prompts = summarizer.build_summarization_messages(&messages);
905 let system = &prompts[0].content;
906 assert!(
907 system.contains("updating an existing conversation summary"),
908 "IncrementalMerge prompt should contain 'updating an existing conversation summary'"
909 );
910 assert!(
911 system.contains("Incorporate new information"),
912 "IncrementalMerge prompt should mention incorporating new information"
913 );
914 }
915
916 #[test]
917 fn default_summary_mode_is_full_rewrite() {
918 assert!(matches!(SummaryMode::default(), SummaryMode::FullRewrite));
919 }
920
921 #[test]
922 fn incremental_merge_includes_existing_summary_in_user_content() {
923 let summarizer = LlmSummarizer::new(
924 Arc::new(DummyProvider),
925 "model".to_string(),
926 Some("Previous summary content".to_string()),
927 None,
928 )
929 .with_summary_mode(SummaryMode::IncrementalMerge);
930 let messages = vec![
931 Message::user("new work"),
932 Message::assistant("doing it", None),
933 ];
934 let prompts = summarizer.build_summarization_messages(&messages);
935 let user_content = &prompts[1].content;
936 assert!(
937 user_content.contains("Previous Summary"),
938 "IncrementalMerge user prompt should include the existing summary"
939 );
940 assert!(
941 user_content.contains("Previous summary content"),
942 "IncrementalMerge user prompt should include the actual summary text"
943 );
944 }
945}