1use std::fmt::Write as _;
14use std::sync::Arc;
15use std::time::Duration;
16
17use futures::StreamExt as _;
18use zeph_common::OVERFLOW_NOTICE_PREFIX;
19use zeph_llm::LlmProvider as _;
20use zeph_llm::any::AnyProvider;
21use zeph_llm::provider::{Message, MessageMetadata, MessagePart, Role};
22use zeph_memory::{AnchoredSummary, TokenCounter};
23
24#[derive(Clone)]
29pub struct SummarizationDeps {
30 pub provider: AnyProvider,
32 pub llm_timeout: Duration,
34 pub token_counter: Arc<TokenCounter>,
36 pub structured_summaries: bool,
38 #[allow(clippy::type_complexity)]
43 pub on_anchored_summary: Option<Arc<dyn Fn(&AnchoredSummary, bool) + Send + Sync>>,
44}
45
46pub async fn summarize_structured(
55 deps: &SummarizationDeps,
56 messages: &[Message],
57 guidelines: &str,
58) -> Result<AnchoredSummary, zeph_llm::LlmError> {
59 let prompt = build_anchored_summary_prompt(messages, guidelines);
60 let msgs = [Message {
61 role: Role::User,
62 content: prompt,
63 parts: vec![],
64 metadata: MessageMetadata::default(),
65 }];
66 let summary: AnchoredSummary = tokio::time::timeout(
67 deps.llm_timeout,
68 deps.provider.chat_typed_erased::<AnchoredSummary>(&msgs),
69 )
70 .await
71 .map_err(|_| zeph_llm::LlmError::Timeout)??;
72
73 if !summary.files_modified.is_empty() && summary.decisions_made.is_empty() {
74 tracing::warn!("structured summary: decisions_made is empty");
75 } else if summary.files_modified.is_empty() {
76 tracing::warn!(
77 "structured summary: files_modified is empty (may be a pure discussion session)"
78 );
79 }
80
81 if !summary.is_complete() {
82 tracing::warn!(
83 session_intent_empty = summary.session_intent.trim().is_empty(),
84 next_steps_empty = summary.next_steps.is_empty(),
85 "structured summary incomplete: mandatory fields missing, falling back to prose"
86 );
87 return Err(zeph_llm::LlmError::Other(
88 "structured summary missing mandatory fields".into(),
89 ));
90 }
91
92 if let Err(msg) = summary.validate() {
93 tracing::warn!(
94 error = %msg,
95 "structured summary failed field validation, falling back to prose"
96 );
97 return Err(zeph_llm::LlmError::Other(msg));
98 }
99
100 Ok(summary)
101}
102
103pub async fn single_pass_summary(
108 deps: &SummarizationDeps,
109 messages: &[Message],
110 guidelines: &str,
111) -> Result<String, zeph_llm::LlmError> {
112 let prompt = build_chunk_prompt(messages, guidelines);
113 let msgs = [Message {
114 role: Role::User,
115 content: prompt,
116 parts: vec![],
117 metadata: MessageMetadata::default(),
118 }];
119 tokio::time::timeout(deps.llm_timeout, deps.provider.chat(&msgs))
120 .await
121 .map_err(|_| zeph_llm::LlmError::Timeout)?
122}
123
124#[allow(clippy::too_many_lines)]
133pub async fn summarize_with_llm(
134 deps: &SummarizationDeps,
135 messages: &[Message],
136 guidelines: &str,
137) -> Result<String, zeph_llm::LlmError> {
138 const CHUNK_TOKEN_BUDGET: usize = 4096;
139 const OVERSIZED_THRESHOLD: usize = CHUNK_TOKEN_BUDGET / 2;
140
141 let chunks = crate::slot::chunk_messages(
142 messages,
143 CHUNK_TOKEN_BUDGET,
144 OVERSIZED_THRESHOLD,
145 &deps.token_counter,
146 );
147
148 if chunks.len() <= 1 {
149 return single_pass_summary(deps, messages, guidelines).await;
150 }
151
152 let provider = deps.provider.clone();
154 let guidelines_owned = guidelines.to_string();
155 let timeout = deps.llm_timeout;
156 let results: Vec<_> = futures::stream::iter(chunks.into_iter().map(|chunk| {
157 let guidelines_ref = guidelines_owned.clone();
158 let prompt = build_chunk_prompt(&chunk, &guidelines_ref);
159 let p = provider.clone();
160 async move {
161 tokio::time::timeout(
162 timeout,
163 p.chat(&[Message {
164 role: Role::User,
165 content: prompt,
166 parts: vec![],
167 metadata: MessageMetadata::default(),
168 }]),
169 )
170 .await
171 .map_err(|_| zeph_llm::LlmError::Timeout)?
172 }
173 }))
174 .buffer_unordered(4)
175 .collect()
176 .await;
177
178 let partial_summaries: Vec<String> = results
179 .into_iter()
180 .collect::<Result<Vec<_>, zeph_llm::LlmError>>()
181 .unwrap_or_else(|e| {
182 tracing::warn!(
183 "chunked compaction: one or more chunks failed: {e:#}, falling back to single-pass"
184 );
185 Vec::new()
186 });
187
188 if partial_summaries.is_empty() {
189 return single_pass_summary(deps, messages, guidelines).await;
190 }
191
192 let numbered = {
194 let cap: usize = partial_summaries.iter().map(|s| s.len() + 8).sum();
195 let mut buf = String::with_capacity(cap);
196 for (i, s) in partial_summaries.iter().enumerate() {
197 if i > 0 {
198 buf.push_str("\n\n");
199 }
200 let _ = write!(buf, "{}. {s}", i + 1);
201 }
202 buf
203 };
204
205 if deps.structured_summaries {
206 let anchored_prompt = format!(
207 "<analysis>\n\
208 Merge these partial conversation summaries into a single structured summary.\n\
209 </analysis>\n\
210 \n\
211 Produce a JSON object with exactly these 5 fields:\n\
212 - session_intent: string — what the user is trying to accomplish\n\
213 - files_modified: string[] — file paths, function names, structs touched\n\
214 - decisions_made: string[] — each entry: \"Decision: X — Reason: Y\"\n\
215 - open_questions: string[] — unresolved questions or blockers\n\
216 - next_steps: string[] — concrete next actions\n\
217 \n\
218 Partial summaries:\n{numbered}"
219 );
220 let anchored_msgs = [Message {
221 role: Role::User,
222 content: anchored_prompt,
223 parts: vec![],
224 metadata: MessageMetadata::default(),
225 }];
226 match tokio::time::timeout(
227 timeout,
228 deps.provider
229 .chat_typed_erased::<AnchoredSummary>(&anchored_msgs),
230 )
231 .await
232 {
233 Ok(Ok(anchored)) if anchored.is_complete() => {
234 if let Some(ref cb) = deps.on_anchored_summary {
235 cb(&anchored, false);
236 }
237 return Ok(crate::slot::cap_summary(anchored.to_markdown(), 16_000));
238 }
239 Ok(Ok(anchored)) => {
240 tracing::warn!(
241 "chunked consolidation: structured summary incomplete, falling back to prose"
242 );
243 if let Some(ref cb) = deps.on_anchored_summary {
244 cb(&anchored, true);
245 }
246 }
247 Ok(Err(e)) => {
248 tracing::warn!(error = %e, "chunked consolidation: structured output failed, falling back to prose");
249 }
250 Err(_) => {
251 tracing::warn!(
252 "chunked consolidation: structured output timed out, falling back to prose"
253 );
254 }
255 }
256 }
257
258 let consolidation_prompt = format!(
259 "<analysis>\n\
260 Merge these partial conversation summaries into a single structured compaction note.\n\
261 Produce exactly these 9 sections covering all partial summaries:\n\
262 1. User Intent\n2. Technical Concepts\n3. Files & Code\n4. Errors & Fixes\n\
263 5. Problem Solving\n6. User Messages\n7. Pending Tasks\n8. Current Work\n9. Next Step\n\
264 </analysis>\n\n\
265 Partial summaries:\n{numbered}"
266 );
267
268 let consolidation_msgs = [Message {
269 role: Role::User,
270 content: consolidation_prompt,
271 parts: vec![],
272 metadata: MessageMetadata::default(),
273 }];
274 tokio::time::timeout(timeout, deps.provider.chat(&consolidation_msgs))
275 .await
276 .map_err(|_| zeph_llm::LlmError::Timeout)?
277}
278
279#[must_use]
292pub fn build_chunk_prompt(messages: &[Message], guidelines: &str) -> String {
293 let history_text = format_history(messages);
294
295 let guidelines_section = guidelines_xml(guidelines);
296
297 format!(
298 "<analysis>\n\
299 Analyze this conversation and produce a structured compaction note for self-consumption.\n\
300 This note replaces the original messages in your context window — be thorough.\n\
301 Longer is better if it preserves actionable detail.\n\
302 </analysis>\n\
303 {guidelines_section}\n\
304 Produce exactly these 9 sections:\n\
305 1. User Intent — what the user is ultimately trying to accomplish\n\
306 2. Technical Concepts — key technologies, patterns, constraints discussed\n\
307 3. Files & Code — file paths, function names, structs, enums touched or relevant\n\
308 4. Errors & Fixes — every error encountered and whether/how it was resolved\n\
309 5. Problem Solving — approaches tried, decisions made, alternatives rejected\n\
310 6. User Messages — verbatim user requests that are still pending or relevant\n\
311 7. Pending Tasks — items explicitly promised or left TODO\n\
312 8. Current Work — the exact task in progress at the moment of compaction\n\
313 9. Next Step — the single most important action to take immediately after compaction\n\
314 \n\
315 Conversation:\n{history_text}"
316 )
317}
318
319#[must_use]
332pub fn build_anchored_summary_prompt(messages: &[Message], guidelines: &str) -> String {
333 let history_text = format_history(messages);
334 let guidelines_section = guidelines_xml(guidelines);
335
336 format!(
337 "<analysis>\n\
338 You are compacting a conversation into a structured summary for self-consumption.\n\
339 This summary replaces the original messages in your context window.\n\
340 Every field MUST be populated — empty fields mean lost information.\n\
341 </analysis>\n\
342 {guidelines_section}\n\
343 Produce a JSON object with exactly these 5 fields:\n\
344 - session_intent: string — what the user is trying to accomplish\n\
345 - files_modified: string[] — file paths, function names, structs touched\n\
346 - decisions_made: string[] — each entry: \"Decision: X — Reason: Y\"\n\
347 - open_questions: string[] — unresolved questions or blockers\n\
348 - next_steps: string[] — concrete next actions\n\
349 \n\
350 Be thorough. Preserve all file paths, line numbers, error messages, \
351 and specific identifiers — they cannot be recovered.\n\
352 \n\
353 Conversation:\n{history_text}"
354 )
355}
356
357#[must_use]
362pub fn build_metadata_summary(messages: &[Message], truncate: fn(&str, usize) -> String) -> String {
363 let mut user_count = 0usize;
364 let mut assistant_count = 0usize;
365 let mut system_count = 0usize;
366 let mut last_user = String::new();
367 let mut last_assistant = String::new();
368
369 for m in messages {
370 match m.role {
371 Role::User => {
372 user_count += 1;
373 if !m.content.is_empty() {
374 last_user.clone_from(&m.content);
375 }
376 }
377 Role::Assistant => {
378 assistant_count += 1;
379 if !m.content.is_empty() {
380 last_assistant.clone_from(&m.content);
381 }
382 }
383 Role::System => system_count += 1,
384 }
385 }
386
387 let last_user_preview = truncate(&last_user, 200);
388 let last_assistant_preview = truncate(&last_assistant, 200);
389
390 format!(
391 "[metadata summary — LLM compaction unavailable]\n\
392 Messages compacted: {} ({} user, {} assistant, {} system)\n\
393 Last user message: {last_user_preview}\n\
394 Last assistant message: {last_assistant_preview}",
395 messages.len(),
396 user_count,
397 assistant_count,
398 system_count,
399 )
400}
401
402#[must_use]
406pub fn build_tool_pair_summary_prompt(req: &Message, res: &Message) -> String {
407 format!(
408 "Produce a concise but technically precise summary of this tool invocation.\n\
409 Preserve all facts that would be needed to continue work without re-running the tool:\n\
410 - Tool name and key input parameters (file paths, function names, patterns, line ranges)\n\
411 - Exact findings: line numbers, struct/enum/function names, error messages, numeric values\n\
412 - Outcome: what was found, changed, created, or confirmed\n\
413 Do NOT omit specific identifiers, paths, or numbers — they cannot be recovered later.\n\
414 Use 2-4 sentences maximum.\n\n\
415 <tool_request>\n{}\n</tool_request>\n\n<tool_response>\n{}\n</tool_response>",
416 req.content, res.content
417 )
418}
419
420#[allow(
428 clippy::cast_precision_loss,
429 clippy::cast_possible_truncation,
430 clippy::cast_sign_loss,
431 clippy::cast_possible_wrap
432)]
433#[must_use]
434pub fn remove_tool_responses_middle_out(mut messages: Vec<Message>, fraction: f32) -> Vec<Message> {
435 let tool_indices: Vec<usize> = messages
436 .iter()
437 .enumerate()
438 .filter(|(_, m)| {
439 m.parts.iter().any(|p| {
440 matches!(
441 p,
442 MessagePart::ToolResult { .. } | MessagePart::ToolOutput { .. }
443 )
444 })
445 })
446 .map(|(i, _)| i)
447 .collect();
448
449 if tool_indices.is_empty() {
450 return messages;
451 }
452
453 let n = tool_indices.len();
454 let to_remove = ((n as f32 * fraction).ceil() as usize).min(n);
455
456 let center = n / 2;
457 let mut remove_set: Vec<usize> = Vec::with_capacity(to_remove);
458 let mut left = center as isize - 1;
459 let mut right = center;
460 let mut count = 0;
461
462 while count < to_remove {
463 if right < n {
464 remove_set.push(tool_indices[right]);
465 count += 1;
466 right += 1;
467 }
468 if count < to_remove && left >= 0 {
469 let idx = left as usize;
470 if !remove_set.contains(&tool_indices[idx]) {
471 remove_set.push(tool_indices[idx]);
472 count += 1;
473 }
474 }
475 left -= 1;
476 if left < 0 && right >= n {
477 break;
478 }
479 }
480
481 for &msg_idx in &remove_set {
482 let msg = &mut messages[msg_idx];
483 for part in &mut msg.parts {
484 match part {
485 MessagePart::ToolResult { content, .. } => {
486 let ref_notice = extract_overflow_ref(content).map_or_else(
487 || String::from("[compacted]"),
488 |uuid| {
489 format!("[tool output pruned; use read_overflow {uuid} to retrieve]")
490 },
491 );
492 *content = ref_notice;
493 }
494 MessagePart::ToolOutput {
495 body, compacted_at, ..
496 } => {
497 if compacted_at.is_none() {
498 let ref_notice = extract_overflow_ref(body)
499 .map(|uuid| {
500 format!(
501 "[tool output pruned; use read_overflow {uuid} to retrieve]"
502 )
503 })
504 .unwrap_or_default();
505 *body = ref_notice;
506 *compacted_at = Some(
507 std::time::SystemTime::now()
508 .duration_since(std::time::UNIX_EPOCH)
509 .unwrap_or_default()
510 .as_secs()
511 .cast_signed(),
512 );
513 }
514 }
515 _ => {}
516 }
517 }
518 msg.rebuild_content();
519 }
520 messages
521}
522
523#[must_use]
530pub fn extract_overflow_ref(body: &str) -> Option<&str> {
531 let start = body.find(OVERFLOW_NOTICE_PREFIX)?;
532 let rest = &body[start + OVERFLOW_NOTICE_PREFIX.len()..];
533 let end = rest.find(" \u{2014} ")?;
534 Some(&rest[..end])
535}
536
537fn format_history(messages: &[Message]) -> String {
538 let estimated_len: usize = messages
539 .iter()
540 .map(|m| "[assistant]: ".len() + m.content.len() + 2)
541 .sum();
542 let mut history_text = String::with_capacity(estimated_len);
543 for (i, m) in messages.iter().enumerate() {
544 if i > 0 {
545 history_text.push_str("\n\n");
546 }
547 let role = match m.role {
548 Role::User => "user",
549 Role::Assistant => "assistant",
550 Role::System => "system",
551 };
552 let _ = write!(history_text, "[{role}]: {}", m.content);
553 }
554 history_text
555}
556
557fn guidelines_xml(guidelines: &str) -> String {
558 if guidelines.is_empty() {
559 String::new()
560 } else {
561 format!("\n<compression-guidelines>\n{guidelines}\n</compression-guidelines>\n")
562 }
563}
564
565#[cfg(test)]
566mod tests {
567 use super::*;
568 use zeph_llm::provider::{Message, MessageMetadata, Role};
569
570 fn user_msg(content: &str) -> Message {
571 Message {
572 role: Role::User,
573 content: content.to_string(),
574 parts: vec![],
575 metadata: MessageMetadata::default(),
576 }
577 }
578
579 fn assistant_msg(content: &str) -> Message {
580 Message {
581 role: Role::Assistant,
582 content: content.to_string(),
583 parts: vec![],
584 metadata: MessageMetadata::default(),
585 }
586 }
587
588 #[test]
589 fn build_chunk_prompt_includes_guidelines_section() {
590 let msgs = [user_msg("hello")];
591 let prompt = build_chunk_prompt(&msgs, "be concise");
592 assert!(
593 prompt.contains("<compression-guidelines>"),
594 "prompt must include guidelines XML"
595 );
596 assert!(
597 prompt.contains("be concise"),
598 "prompt must embed the guidelines text"
599 );
600 }
601
602 #[test]
603 fn build_chunk_prompt_no_guidelines_omits_section() {
604 let prompt = build_chunk_prompt(&[], "");
605 assert!(
606 !prompt.contains("<compression-guidelines>"),
607 "empty guidelines must not produce the XML section"
608 );
609 }
610
611 #[test]
612 fn build_anchored_summary_prompt_contains_json_fields() {
613 let prompt = build_anchored_summary_prompt(&[], "");
614 assert!(prompt.contains("session_intent"));
615 assert!(prompt.contains("files_modified"));
616 assert!(prompt.contains("next_steps"));
617 }
618
619 #[test]
620 fn build_metadata_summary_counts_messages() {
621 let msgs = [user_msg("hi"), assistant_msg("hello"), user_msg("bye")];
622 let summary = build_metadata_summary(&msgs, |s, n| s.chars().take(n).collect());
623 assert!(summary.contains("3 (2 user, 1 assistant, 0 system)"));
624 }
625
626 #[test]
627 fn build_tool_pair_summary_prompt_contains_request_and_response() {
628 let req = user_msg("req content");
629 let res = assistant_msg("res content");
630 let prompt = build_tool_pair_summary_prompt(&req, &res);
631 assert!(prompt.contains("req content"));
632 assert!(prompt.contains("res content"));
633 }
634
635 #[test]
636 fn extract_overflow_ref_returns_uuid_when_present() {
637 let uuid = "550e8400-e29b-41d4-a716-446655440000";
638 let body =
639 format!("some output\n[full output stored \u{2014} ID: {uuid} \u{2014} 12345 bytes]");
640 assert_eq!(extract_overflow_ref(&body), Some(uuid));
641 }
642
643 #[test]
644 fn extract_overflow_ref_returns_none_when_absent() {
645 assert_eq!(extract_overflow_ref("normal output"), None);
646 }
647
648 fn tool_result_msg(content: &str) -> Message {
649 use zeph_llm::provider::MessagePart;
650 Message {
651 role: Role::User,
652 content: content.to_string(),
653 parts: vec![
654 MessagePart::ToolUse {
655 id: "t1".into(),
656 name: "bash".into(),
657 input: serde_json::Value::Null,
658 },
659 MessagePart::ToolResult {
660 tool_use_id: "t1".into(),
661 content: content.to_string(),
662 is_error: false,
663 },
664 ],
665 metadata: MessageMetadata::default(),
666 }
667 }
668
669 #[test]
670 fn remove_tool_responses_middle_out_clears_correct_fraction() {
671 let mut messages = vec![
673 tool_result_msg("out0"),
674 tool_result_msg("out1"),
675 tool_result_msg("out2"),
676 tool_result_msg("out3"),
677 ];
678 messages = remove_tool_responses_middle_out(messages, 0.5);
679
680 let compacted_count = messages
681 .iter()
682 .flat_map(|m| m.parts.iter())
683 .filter(|p| {
684 if let zeph_llm::provider::MessagePart::ToolResult { content, .. } = p {
685 content == "[compacted]"
686 } else {
687 false
688 }
689 })
690 .count();
691
692 assert_eq!(
693 compacted_count, 2,
694 "ceil(4 * 0.5) = 2 tool results must be replaced with [compacted]"
695 );
696 }
697
698 #[test]
699 fn remove_tool_responses_middle_out_no_tool_messages_returns_unchanged() {
700 let messages = vec![user_msg("hello"), assistant_msg("hi")];
701 let result = remove_tool_responses_middle_out(messages.clone(), 0.5);
702 assert_eq!(result.len(), messages.len());
703 assert!(
704 result.iter().all(|m| m.parts.is_empty()),
705 "non-tool messages must be unchanged"
706 );
707 }
708}