1use std::fmt::Write as _;
14use std::sync::Arc;
15use std::time::Duration;
16
17use futures::StreamExt as _;
18use zeph_common::OVERFLOW_NOTICE_PREFIX;
19use zeph_common::memory::AnchoredSummary;
20use zeph_llm::LlmProvider as _;
21use zeph_llm::any::AnyProvider;
22use zeph_llm::provider::{Message, MessageMetadata, MessagePart, Role};
23
24pub trait MessageTokenCounter: Send + Sync {
29 fn count_message_tokens(&self, msg: &Message) -> usize;
31}
32
33#[derive(Clone)]
38pub struct SummarizationDeps {
39 pub provider: AnyProvider,
41 pub llm_timeout: Duration,
43 pub token_counter: Arc<dyn MessageTokenCounter>,
45 pub structured_summaries: bool,
47 #[allow(clippy::type_complexity)]
52 pub on_anchored_summary: Option<Arc<dyn Fn(&AnchoredSummary, bool) + Send + Sync>>,
53}
54
55pub async fn summarize_structured(
64 deps: &SummarizationDeps,
65 messages: &[Message],
66 guidelines: &str,
67) -> Result<AnchoredSummary, zeph_llm::LlmError> {
68 let prompt = build_anchored_summary_prompt(messages, guidelines);
69 let msgs = [Message {
70 role: Role::User,
71 content: prompt,
72 parts: vec![],
73 metadata: MessageMetadata::default(),
74 }];
75 let summary: AnchoredSummary = tokio::time::timeout(
76 deps.llm_timeout,
77 deps.provider.chat_typed_erased::<AnchoredSummary>(&msgs),
78 )
79 .await
80 .map_err(|_| zeph_llm::LlmError::Timeout)??;
81
82 if !summary.files_modified.is_empty() && summary.decisions_made.is_empty() {
83 tracing::warn!("structured summary: decisions_made is empty");
84 } else if summary.files_modified.is_empty() {
85 tracing::warn!(
86 "structured summary: files_modified is empty (may be a pure discussion session)"
87 );
88 }
89
90 if !summary.is_complete() {
91 tracing::warn!(
92 session_intent_empty = summary.session_intent.trim().is_empty(),
93 next_steps_empty = summary.next_steps.is_empty(),
94 "structured summary incomplete: mandatory fields missing, falling back to prose"
95 );
96 return Err(zeph_llm::LlmError::StructuredParse(
97 "structured summary missing mandatory fields".into(),
98 ));
99 }
100
101 if let Err(msg) = summary.validate() {
102 tracing::warn!(
103 error = %msg,
104 "structured summary failed field validation, falling back to prose"
105 );
106 return Err(zeph_llm::LlmError::StructuredParse(msg));
107 }
108
109 Ok(summary)
110}
111
112pub async fn single_pass_summary(
117 deps: &SummarizationDeps,
118 messages: &[Message],
119 guidelines: &str,
120) -> Result<String, zeph_llm::LlmError> {
121 let prompt = build_chunk_prompt(messages, guidelines);
122 let msgs = [Message {
123 role: Role::User,
124 content: prompt,
125 parts: vec![],
126 metadata: MessageMetadata::default(),
127 }];
128 tokio::time::timeout(deps.llm_timeout, deps.provider.chat(&msgs))
129 .await
130 .map_err(|_| zeph_llm::LlmError::Timeout)?
131}
132
133pub async fn summarize_with_llm(
142 deps: &SummarizationDeps,
143 messages: &[Message],
144 guidelines: &str,
145) -> Result<String, zeph_llm::LlmError> {
146 const CHUNK_TOKEN_BUDGET: usize = 4096;
147 const OVERSIZED_THRESHOLD: usize = CHUNK_TOKEN_BUDGET / 2;
148
149 let tc = Arc::clone(&deps.token_counter);
150 let chunks = crate::slot::chunk_messages(
151 messages,
152 CHUNK_TOKEN_BUDGET,
153 OVERSIZED_THRESHOLD,
154 move |msg| tc.count_message_tokens(msg),
155 );
156
157 if chunks.len() <= 1 {
158 return single_pass_summary(deps, messages, guidelines).await;
159 }
160
161 let partial_summaries = run_chunk_summaries(deps, chunks, guidelines).await;
162
163 if partial_summaries.is_empty() {
164 return single_pass_summary(deps, messages, guidelines).await;
165 }
166
167 let numbered = join_partial_summaries(&partial_summaries);
168
169 if deps.structured_summaries
170 && let Some(result) = try_structured_consolidation(deps, &numbered).await
171 {
172 return Ok(result);
173 }
174
175 prose_consolidation(deps, &numbered).await
176}
177
178async fn run_chunk_summaries(
181 deps: &SummarizationDeps,
182 chunks: Vec<Vec<Message>>,
183 guidelines: &str,
184) -> Vec<String> {
185 let provider = deps.provider.clone();
186 let guidelines_owned = guidelines.to_string();
187 let timeout = deps.llm_timeout;
188
189 let results: Vec<_> = futures::stream::iter(chunks.into_iter().map(|chunk| {
190 let guidelines_ref = guidelines_owned.clone();
191 let prompt = build_chunk_prompt(&chunk, &guidelines_ref);
192 let p = provider.clone();
193 async move {
194 tokio::time::timeout(
195 timeout,
196 p.chat(&[Message {
197 role: Role::User,
198 content: prompt,
199 parts: vec![],
200 metadata: MessageMetadata::default(),
201 }]),
202 )
203 .await
204 .map_err(|_| zeph_llm::LlmError::Timeout)?
205 }
206 }))
207 .buffer_unordered(4)
208 .collect()
209 .await;
210
211 results
212 .into_iter()
213 .collect::<Result<Vec<_>, zeph_llm::LlmError>>()
214 .unwrap_or_else(|e| {
215 tracing::warn!(
216 "chunked compaction: one or more chunks failed: {e:#}, falling back to single-pass"
217 );
218 Vec::new()
219 })
220}
221
222fn join_partial_summaries(partials: &[String]) -> String {
223 let cap: usize = partials.iter().map(|s| s.len() + 8).sum();
224 let mut buf = String::with_capacity(cap);
225 for (i, s) in partials.iter().enumerate() {
226 if i > 0 {
227 buf.push_str("\n\n");
228 }
229 let _ = write!(buf, "{}. {s}", i + 1);
230 }
231 buf
232}
233
234async fn try_structured_consolidation(deps: &SummarizationDeps, numbered: &str) -> Option<String> {
235 let timeout = deps.llm_timeout;
236 let anchored_prompt = format!(
237 "<analysis>\n\
238 Merge these partial conversation summaries into a single structured summary.\n\
239 </analysis>\n\
240 \n\
241 Produce a JSON object with exactly these 5 fields:\n\
242 - session_intent: string — what the user is trying to accomplish\n\
243 - files_modified: string[] — file paths, function names, structs touched\n\
244 - decisions_made: string[] — each entry: \"Decision: X — Reason: Y\"\n\
245 - open_questions: string[] — unresolved questions or blockers\n\
246 - next_steps: string[] — concrete next actions\n\
247 \n\
248 Partial summaries:\n{numbered}"
249 );
250 let anchored_msgs = [Message {
251 role: Role::User,
252 content: anchored_prompt,
253 parts: vec![],
254 metadata: MessageMetadata::default(),
255 }];
256 match tokio::time::timeout(
257 timeout,
258 deps.provider
259 .chat_typed_erased::<AnchoredSummary>(&anchored_msgs),
260 )
261 .await
262 {
263 Ok(Ok(anchored)) if anchored.is_complete() => {
264 if let Some(ref cb) = deps.on_anchored_summary {
265 cb(&anchored, false);
266 }
267 Some(crate::slot::cap_summary(anchored.to_markdown(), 16_000))
268 }
269 Ok(Ok(anchored)) => {
270 tracing::warn!(
271 "chunked consolidation: structured summary incomplete, falling back to prose"
272 );
273 if let Some(ref cb) = deps.on_anchored_summary {
274 cb(&anchored, true);
275 }
276 None
277 }
278 Ok(Err(e)) => {
279 tracing::warn!(error = %e, "chunked consolidation: structured output failed, falling back to prose");
280 None
281 }
282 Err(_) => {
283 tracing::warn!(
284 "chunked consolidation: structured output timed out, falling back to prose"
285 );
286 None
287 }
288 }
289}
290
291async fn prose_consolidation(
292 deps: &SummarizationDeps,
293 numbered: &str,
294) -> Result<String, zeph_llm::LlmError> {
295 let timeout = deps.llm_timeout;
296 let consolidation_prompt = format!(
297 "<analysis>\n\
298 Merge these partial conversation summaries into a single structured compaction note.\n\
299 Produce exactly these 9 sections covering all partial summaries:\n\
300 1. User Intent\n2. Technical Concepts\n3. Files & Code\n4. Errors & Fixes\n\
301 5. Problem Solving\n6. User Messages\n7. Pending Tasks\n8. Current Work\n9. Next Step\n\
302 </analysis>\n\n\
303 Partial summaries:\n{numbered}"
304 );
305 let consolidation_msgs = [Message {
306 role: Role::User,
307 content: consolidation_prompt,
308 parts: vec![],
309 metadata: MessageMetadata::default(),
310 }];
311 tokio::time::timeout(timeout, deps.provider.chat(&consolidation_msgs))
312 .await
313 .map_err(|_| zeph_llm::LlmError::Timeout)?
314}
315
316#[must_use]
329pub fn build_chunk_prompt(messages: &[Message], guidelines: &str) -> String {
330 let history_text = format_history(messages);
331
332 let guidelines_section = guidelines_xml(guidelines);
333
334 format!(
335 "<analysis>\n\
336 Analyze this conversation and produce a structured compaction note for self-consumption.\n\
337 This note replaces the original messages in your context window — be thorough.\n\
338 Longer is better if it preserves actionable detail.\n\
339 </analysis>\n\
340 {guidelines_section}\n\
341 Produce exactly these 9 sections:\n\
342 1. User Intent — what the user is ultimately trying to accomplish\n\
343 2. Technical Concepts — key technologies, patterns, constraints discussed\n\
344 3. Files & Code — file paths, function names, structs, enums touched or relevant\n\
345 4. Errors & Fixes — every error encountered and whether/how it was resolved\n\
346 5. Problem Solving — approaches tried, decisions made, alternatives rejected\n\
347 6. User Messages — verbatim user requests that are still pending or relevant\n\
348 7. Pending Tasks — items explicitly promised or left TODO\n\
349 8. Current Work — the exact task in progress at the moment of compaction\n\
350 9. Next Step — the single most important action to take immediately after compaction\n\
351 \n\
352 Conversation:\n{history_text}"
353 )
354}
355
356#[must_use]
369pub fn build_anchored_summary_prompt(messages: &[Message], guidelines: &str) -> String {
370 let history_text = format_history(messages);
371 let guidelines_section = guidelines_xml(guidelines);
372
373 format!(
374 "<analysis>\n\
375 You are compacting a conversation into a structured summary for self-consumption.\n\
376 This summary replaces the original messages in your context window.\n\
377 Every field MUST be populated — empty fields mean lost information.\n\
378 </analysis>\n\
379 {guidelines_section}\n\
380 Produce a JSON object with exactly these 5 fields:\n\
381 - session_intent: string — what the user is trying to accomplish\n\
382 - files_modified: string[] — file paths, function names, structs touched\n\
383 - decisions_made: string[] — each entry: \"Decision: X — Reason: Y\"\n\
384 - open_questions: string[] — unresolved questions or blockers\n\
385 - next_steps: string[] — concrete next actions\n\
386 \n\
387 Be thorough. Preserve all file paths, line numbers, error messages, \
388 and specific identifiers — they cannot be recovered.\n\
389 \n\
390 Conversation:\n{history_text}"
391 )
392}
393
394#[must_use]
399pub fn build_metadata_summary(messages: &[Message], truncate: fn(&str, usize) -> String) -> String {
400 let mut user_count = 0usize;
401 let mut assistant_count = 0usize;
402 let mut system_count = 0usize;
403 let mut last_user = String::new();
404 let mut last_assistant = String::new();
405
406 for m in messages {
407 match m.role {
408 Role::User => {
409 user_count += 1;
410 if !m.content.is_empty() {
411 last_user.clone_from(&m.content);
412 }
413 }
414 Role::Assistant => {
415 assistant_count += 1;
416 if !m.content.is_empty() {
417 last_assistant.clone_from(&m.content);
418 }
419 }
420 Role::System => system_count += 1,
421 }
422 }
423
424 let last_user_preview = truncate(&last_user, 200);
425 let last_assistant_preview = truncate(&last_assistant, 200);
426
427 format!(
428 "[metadata summary — LLM compaction unavailable]\n\
429 Messages compacted: {} ({} user, {} assistant, {} system)\n\
430 Last user message: {last_user_preview}\n\
431 Last assistant message: {last_assistant_preview}",
432 messages.len(),
433 user_count,
434 assistant_count,
435 system_count,
436 )
437}
438
439#[must_use]
443pub fn build_tool_pair_summary_prompt(req: &Message, res: &Message) -> String {
444 format!(
445 "Produce a concise but technically precise summary of this tool invocation.\n\
446 Preserve all facts that would be needed to continue work without re-running the tool:\n\
447 - Tool name and key input parameters (file paths, function names, patterns, line ranges)\n\
448 - Exact findings: line numbers, struct/enum/function names, error messages, numeric values\n\
449 - Outcome: what was found, changed, created, or confirmed\n\
450 Do NOT omit specific identifiers, paths, or numbers — they cannot be recovered later.\n\
451 Use 2-4 sentences maximum.\n\n\
452 <tool_request>\n{}\n</tool_request>\n\n<tool_response>\n{}\n</tool_response>",
453 req.content, res.content
454 )
455}
456
457#[allow(
465 clippy::cast_precision_loss,
466 clippy::cast_possible_truncation,
467 clippy::cast_sign_loss,
468 clippy::cast_possible_wrap
469)]
470#[must_use]
471pub fn remove_tool_responses_middle_out(mut messages: Vec<Message>, fraction: f32) -> Vec<Message> {
472 let tool_indices: Vec<usize> = messages
473 .iter()
474 .enumerate()
475 .filter(|(_, m)| {
476 m.parts.iter().any(|p| {
477 matches!(
478 p,
479 MessagePart::ToolResult { .. } | MessagePart::ToolOutput { .. }
480 )
481 })
482 })
483 .map(|(i, _)| i)
484 .collect();
485
486 if tool_indices.is_empty() {
487 return messages;
488 }
489
490 let n = tool_indices.len();
491 let to_remove = ((n as f32 * fraction).ceil() as usize).min(n);
492
493 let center = n / 2;
494 let mut remove_set: Vec<usize> = Vec::with_capacity(to_remove);
495 let mut left = center as isize - 1;
496 let mut right = center;
497 let mut count = 0;
498
499 while count < to_remove {
500 if right < n {
501 remove_set.push(tool_indices[right]);
502 count += 1;
503 right += 1;
504 }
505 if count < to_remove && left >= 0 {
506 let idx = left as usize;
507 if !remove_set.contains(&tool_indices[idx]) {
508 remove_set.push(tool_indices[idx]);
509 count += 1;
510 }
511 }
512 left -= 1;
513 if left < 0 && right >= n {
514 break;
515 }
516 }
517
518 for &msg_idx in &remove_set {
519 let msg = &mut messages[msg_idx];
520 for part in &mut msg.parts {
521 match part {
522 MessagePart::ToolResult { content, .. } => {
523 let ref_notice = extract_overflow_ref(content).map_or_else(
524 || String::from("[compacted]"),
525 |uuid| {
526 format!("[tool output pruned; use read_overflow {uuid} to retrieve]")
527 },
528 );
529 *content = ref_notice;
530 }
531 MessagePart::ToolOutput {
532 body, compacted_at, ..
533 } if compacted_at.is_none() => {
534 let ref_notice = extract_overflow_ref(body)
535 .map(|uuid| {
536 format!("[tool output pruned; use read_overflow {uuid} to retrieve]")
537 })
538 .unwrap_or_default();
539 *body = ref_notice;
540 *compacted_at = Some(
541 std::time::SystemTime::now()
542 .duration_since(std::time::UNIX_EPOCH)
543 .unwrap_or_default()
544 .as_secs()
545 .cast_signed(),
546 );
547 }
548 _ => {}
549 }
550 }
551 msg.rebuild_content();
552 }
553 messages
554}
555
556#[must_use]
563pub fn extract_overflow_ref(body: &str) -> Option<&str> {
564 let start = body.find(OVERFLOW_NOTICE_PREFIX)?;
565 let rest = &body[start + OVERFLOW_NOTICE_PREFIX.len()..];
566 let end = rest.find(" \u{2014} ")?;
567 Some(&rest[..end])
568}
569
570fn format_history(messages: &[Message]) -> String {
571 let estimated_len: usize = messages
572 .iter()
573 .map(|m| "[assistant]: ".len() + m.content.len() + 2)
574 .sum();
575 let mut history_text = String::with_capacity(estimated_len);
576 for (i, m) in messages.iter().enumerate() {
577 if i > 0 {
578 history_text.push_str("\n\n");
579 }
580 let role = match m.role {
581 Role::User => "user",
582 Role::Assistant => "assistant",
583 Role::System => "system",
584 };
585 let _ = write!(history_text, "[{role}]: {}", m.content);
586 }
587 history_text
588}
589
590fn guidelines_xml(guidelines: &str) -> String {
591 if guidelines.is_empty() {
592 String::new()
593 } else {
594 format!("\n<compression-guidelines>\n{guidelines}\n</compression-guidelines>\n")
595 }
596}
597
598#[cfg(test)]
599mod tests {
600 use super::*;
601 use zeph_llm::provider::{Message, MessageMetadata, Role};
602
603 fn user_msg(content: &str) -> Message {
604 Message {
605 role: Role::User,
606 content: content.to_string(),
607 parts: vec![],
608 metadata: MessageMetadata::default(),
609 }
610 }
611
612 fn assistant_msg(content: &str) -> Message {
613 Message {
614 role: Role::Assistant,
615 content: content.to_string(),
616 parts: vec![],
617 metadata: MessageMetadata::default(),
618 }
619 }
620
621 #[test]
622 fn build_chunk_prompt_includes_guidelines_section() {
623 let msgs = [user_msg("hello")];
624 let prompt = build_chunk_prompt(&msgs, "be concise");
625 assert!(
626 prompt.contains("<compression-guidelines>"),
627 "prompt must include guidelines XML"
628 );
629 assert!(
630 prompt.contains("be concise"),
631 "prompt must embed the guidelines text"
632 );
633 }
634
635 #[test]
636 fn build_chunk_prompt_no_guidelines_omits_section() {
637 let prompt = build_chunk_prompt(&[], "");
638 assert!(
639 !prompt.contains("<compression-guidelines>"),
640 "empty guidelines must not produce the XML section"
641 );
642 }
643
644 #[test]
645 fn build_anchored_summary_prompt_contains_json_fields() {
646 let prompt = build_anchored_summary_prompt(&[], "");
647 assert!(prompt.contains("session_intent"));
648 assert!(prompt.contains("files_modified"));
649 assert!(prompt.contains("next_steps"));
650 }
651
652 #[test]
653 fn build_metadata_summary_counts_messages() {
654 let msgs = [user_msg("hi"), assistant_msg("hello"), user_msg("bye")];
655 let summary = build_metadata_summary(&msgs, |s, n| s.chars().take(n).collect());
656 assert!(summary.contains("3 (2 user, 1 assistant, 0 system)"));
657 }
658
659 #[test]
660 fn build_tool_pair_summary_prompt_contains_request_and_response() {
661 let req = user_msg("req content");
662 let res = assistant_msg("res content");
663 let prompt = build_tool_pair_summary_prompt(&req, &res);
664 assert!(prompt.contains("req content"));
665 assert!(prompt.contains("res content"));
666 }
667
668 #[test]
669 fn extract_overflow_ref_returns_uuid_when_present() {
670 let uuid = "550e8400-e29b-41d4-a716-446655440000";
671 let body =
672 format!("some output\n[full output stored \u{2014} ID: {uuid} \u{2014} 12345 bytes]");
673 assert_eq!(extract_overflow_ref(&body), Some(uuid));
674 }
675
676 #[test]
677 fn extract_overflow_ref_returns_none_when_absent() {
678 assert_eq!(extract_overflow_ref("normal output"), None);
679 }
680
681 fn tool_result_msg(content: &str) -> Message {
682 use zeph_llm::provider::MessagePart;
683 Message {
684 role: Role::User,
685 content: content.to_string(),
686 parts: vec![
687 MessagePart::ToolUse {
688 id: "t1".into(),
689 name: "bash".into(),
690 input: serde_json::Value::Null,
691 },
692 MessagePart::ToolResult {
693 tool_use_id: "t1".into(),
694 content: content.to_string(),
695 is_error: false,
696 },
697 ],
698 metadata: MessageMetadata::default(),
699 }
700 }
701
702 #[test]
703 fn remove_tool_responses_middle_out_clears_correct_fraction() {
704 let mut messages = vec![
706 tool_result_msg("out0"),
707 tool_result_msg("out1"),
708 tool_result_msg("out2"),
709 tool_result_msg("out3"),
710 ];
711 messages = remove_tool_responses_middle_out(messages, 0.5);
712
713 let compacted_count = messages
714 .iter()
715 .flat_map(|m| m.parts.iter())
716 .filter(|p| {
717 if let zeph_llm::provider::MessagePart::ToolResult { content, .. } = p {
718 content == "[compacted]"
719 } else {
720 false
721 }
722 })
723 .count();
724
725 assert_eq!(
726 compacted_count, 2,
727 "ceil(4 * 0.5) = 2 tool results must be replaced with [compacted]"
728 );
729 }
730
731 #[test]
732 fn remove_tool_responses_middle_out_no_tool_messages_returns_unchanged() {
733 let messages = vec![user_msg("hello"), assistant_msg("hi")];
734 let result = remove_tool_responses_middle_out(messages.clone(), 0.5);
735 assert_eq!(result.len(), messages.len());
736 assert!(
737 result.iter().all(|m| m.parts.is_empty()),
738 "non-tool messages must be unchanged"
739 );
740 }
741}