Skip to main content

zeph_context/
summarization.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Pure prompt-building, compaction helpers, and async LLM summarization for context.
5//!
6//! Stateless functions take only `Message` slices and configuration values; they contain
7//! no agent state access. The `SummarizationDeps` struct provides explicit LLM dependencies
8//! for the async summarization functions, avoiding coupling to `Agent<C>`.
9//!
10//! The orchestration layer (`Agent::compact_context`, `Agent::maybe_compact`, etc.)
11//! lives in `zeph-core` and calls these helpers.
12
13use std::fmt::Write as _;
14use std::sync::Arc;
15use std::time::Duration;
16
17use futures::StreamExt as _;
18use zeph_common::OVERFLOW_NOTICE_PREFIX;
19use zeph_common::memory::AnchoredSummary;
20use zeph_llm::LlmProvider as _;
21use zeph_llm::any::AnyProvider;
22use zeph_llm::provider::{Message, MessageMetadata, MessagePart, Role};
23
24/// Token counting for individual messages, used by the summarization chunker.
25///
26/// Implemented by `zeph-memory::TokenCounter` in `zeph-core`. Defined here so
27/// `zeph-context` does not need a direct dependency on `zeph-memory`.
28pub trait MessageTokenCounter: Send + Sync {
29    /// Return the token count for the given message, accounting for all parts.
30    fn count_message_tokens(&self, msg: &Message) -> usize;
31}
32
33/// Explicit LLM dependencies for async summarization, avoiding coupling to `Agent<C>`.
34///
35/// Passed to [`single_pass_summary`], [`summarize_with_llm`], and [`summarize_structured`]
36/// so these functions can be called from `zeph-context` without depending on `zeph-core`.
37#[derive(Clone)]
38pub struct SummarizationDeps {
39    /// LLM provider used for all summarization calls.
40    pub provider: AnyProvider,
41    /// Timeout applied to each individual LLM call.
42    pub llm_timeout: Duration,
43    /// Token counter for chunking message slices.
44    pub token_counter: Arc<dyn MessageTokenCounter>,
45    /// Whether to attempt structured `AnchoredSummary` output before prose.
46    pub structured_summaries: bool,
47    /// Optional callback invoked with the `AnchoredSummary` result and a `fallback` flag.
48    ///
49    /// Used by `zeph-core` to write debug dumps without the `SummarizationDeps` knowing
50    /// about `DebugDumper`. Pass `None` when debug dumps are not needed.
51    #[allow(clippy::type_complexity)]
52    pub on_anchored_summary: Option<Arc<dyn Fn(&AnchoredSummary, bool) + Send + Sync>>,
53}
54
55/// Attempt structured summarization via `chat_typed_erased::<AnchoredSummary>()`.
56///
57/// Returns `Ok(AnchoredSummary)` on success, `Err` when mandatory fields are missing
58/// or the LLM fails. The caller is responsible for falling back to prose on `Err`.
59///
60/// # Errors
61/// Returns [`zeph_llm::LlmError`] when the LLM call fails, times out, or the
62/// returned summary is incomplete.
63pub async fn summarize_structured(
64    deps: &SummarizationDeps,
65    messages: &[Message],
66    guidelines: &str,
67) -> Result<AnchoredSummary, zeph_llm::LlmError> {
68    let prompt = build_anchored_summary_prompt(messages, guidelines);
69    let msgs = [Message {
70        role: Role::User,
71        content: prompt,
72        parts: vec![],
73        metadata: MessageMetadata::default(),
74    }];
75    let summary: AnchoredSummary = tokio::time::timeout(
76        deps.llm_timeout,
77        deps.provider.chat_typed_erased::<AnchoredSummary>(&msgs),
78    )
79    .await
80    .map_err(|_| zeph_llm::LlmError::Timeout)??;
81
82    if !summary.files_modified.is_empty() && summary.decisions_made.is_empty() {
83        tracing::warn!("structured summary: decisions_made is empty");
84    } else if summary.files_modified.is_empty() {
85        tracing::warn!(
86            "structured summary: files_modified is empty (may be a pure discussion session)"
87        );
88    }
89
90    if !summary.is_complete() {
91        tracing::warn!(
92            session_intent_empty = summary.session_intent.trim().is_empty(),
93            next_steps_empty = summary.next_steps.is_empty(),
94            "structured summary incomplete: mandatory fields missing, falling back to prose"
95        );
96        return Err(zeph_llm::LlmError::StructuredParse(
97            "structured summary missing mandatory fields".into(),
98        ));
99    }
100
101    if let Err(msg) = summary.validate() {
102        tracing::warn!(
103            error = %msg,
104            "structured summary failed field validation, falling back to prose"
105        );
106        return Err(zeph_llm::LlmError::StructuredParse(msg));
107    }
108
109    Ok(summary)
110}
111
112/// Single-pass LLM summarization over a message slice.
113///
114/// # Errors
115/// Returns [`zeph_llm::LlmError`] when the LLM call fails or times out.
116pub async fn single_pass_summary(
117    deps: &SummarizationDeps,
118    messages: &[Message],
119    guidelines: &str,
120) -> Result<String, zeph_llm::LlmError> {
121    let prompt = build_chunk_prompt(messages, guidelines);
122    let msgs = [Message {
123        role: Role::User,
124        content: prompt,
125        parts: vec![],
126        metadata: MessageMetadata::default(),
127    }];
128    tokio::time::timeout(deps.llm_timeout, deps.provider.chat(&msgs))
129        .await
130        .map_err(|_| zeph_llm::LlmError::Timeout)?
131}
132
133/// Chunked multi-pass LLM summarization with bounded concurrency.
134///
135/// Splits `messages` into token-bounded chunks and summarizes each chunk with the LLM.
136/// Partial results are consolidated into a final summary. Falls back to single-pass on
137/// chunk failures or context length errors.
138///
139/// # Errors
140/// Returns [`zeph_llm::LlmError`] when all summarization attempts fail.
141pub async fn summarize_with_llm(
142    deps: &SummarizationDeps,
143    messages: &[Message],
144    guidelines: &str,
145) -> Result<String, zeph_llm::LlmError> {
146    const CHUNK_TOKEN_BUDGET: usize = 4096;
147    const OVERSIZED_THRESHOLD: usize = CHUNK_TOKEN_BUDGET / 2;
148
149    let tc = Arc::clone(&deps.token_counter);
150    let chunks = crate::slot::chunk_messages(
151        messages,
152        CHUNK_TOKEN_BUDGET,
153        OVERSIZED_THRESHOLD,
154        move |msg| tc.count_message_tokens(msg),
155    );
156
157    if chunks.len() <= 1 {
158        return single_pass_summary(deps, messages, guidelines).await;
159    }
160
161    let partial_summaries = run_chunk_summaries(deps, chunks, guidelines).await;
162
163    if partial_summaries.is_empty() {
164        return single_pass_summary(deps, messages, guidelines).await;
165    }
166
167    let numbered = join_partial_summaries(&partial_summaries);
168
169    if deps.structured_summaries
170        && let Some(result) = try_structured_consolidation(deps, &numbered).await
171    {
172        return Ok(result);
173    }
174
175    prose_consolidation(deps, &numbered).await
176}
177
178/// Summarizes each chunk concurrently with bounded parallelism. Any chunk error discards all
179/// partials and forces single-pass fallback.
180async fn run_chunk_summaries(
181    deps: &SummarizationDeps,
182    chunks: Vec<Vec<Message>>,
183    guidelines: &str,
184) -> Vec<String> {
185    let provider = deps.provider.clone();
186    let guidelines_owned = guidelines.to_string();
187    let timeout = deps.llm_timeout;
188
189    let results: Vec<_> = futures::stream::iter(chunks.into_iter().map(|chunk| {
190        let guidelines_ref = guidelines_owned.clone();
191        let prompt = build_chunk_prompt(&chunk, &guidelines_ref);
192        let p = provider.clone();
193        async move {
194            tokio::time::timeout(
195                timeout,
196                p.chat(&[Message {
197                    role: Role::User,
198                    content: prompt,
199                    parts: vec![],
200                    metadata: MessageMetadata::default(),
201                }]),
202            )
203            .await
204            .map_err(|_| zeph_llm::LlmError::Timeout)?
205        }
206    }))
207    .buffer_unordered(4)
208    .collect()
209    .await;
210
211    results
212        .into_iter()
213        .collect::<Result<Vec<_>, zeph_llm::LlmError>>()
214        .unwrap_or_else(|e| {
215            tracing::warn!(
216                "chunked compaction: one or more chunks failed: {e:#}, falling back to single-pass"
217            );
218            Vec::new()
219        })
220}
221
222fn join_partial_summaries(partials: &[String]) -> String {
223    let cap: usize = partials.iter().map(|s| s.len() + 8).sum();
224    let mut buf = String::with_capacity(cap);
225    for (i, s) in partials.iter().enumerate() {
226        if i > 0 {
227            buf.push_str("\n\n");
228        }
229        let _ = write!(buf, "{}. {s}", i + 1);
230    }
231    buf
232}
233
234async fn try_structured_consolidation(deps: &SummarizationDeps, numbered: &str) -> Option<String> {
235    let timeout = deps.llm_timeout;
236    let anchored_prompt = format!(
237        "<analysis>\n\
238         Merge these partial conversation summaries into a single structured summary.\n\
239         </analysis>\n\
240         \n\
241         Produce a JSON object with exactly these 5 fields:\n\
242         - session_intent: string — what the user is trying to accomplish\n\
243         - files_modified: string[] — file paths, function names, structs touched\n\
244         - decisions_made: string[] — each entry: \"Decision: X — Reason: Y\"\n\
245         - open_questions: string[] — unresolved questions or blockers\n\
246         - next_steps: string[] — concrete next actions\n\
247         \n\
248         Partial summaries:\n{numbered}"
249    );
250    let anchored_msgs = [Message {
251        role: Role::User,
252        content: anchored_prompt,
253        parts: vec![],
254        metadata: MessageMetadata::default(),
255    }];
256    match tokio::time::timeout(
257        timeout,
258        deps.provider
259            .chat_typed_erased::<AnchoredSummary>(&anchored_msgs),
260    )
261    .await
262    {
263        Ok(Ok(anchored)) if anchored.is_complete() => {
264            if let Some(ref cb) = deps.on_anchored_summary {
265                cb(&anchored, false);
266            }
267            Some(crate::slot::cap_summary(anchored.to_markdown(), 16_000))
268        }
269        Ok(Ok(anchored)) => {
270            tracing::warn!(
271                "chunked consolidation: structured summary incomplete, falling back to prose"
272            );
273            if let Some(ref cb) = deps.on_anchored_summary {
274                cb(&anchored, true);
275            }
276            None
277        }
278        Ok(Err(e)) => {
279            tracing::warn!(error = %e, "chunked consolidation: structured output failed, falling back to prose");
280            None
281        }
282        Err(_) => {
283            tracing::warn!(
284                "chunked consolidation: structured output timed out, falling back to prose"
285            );
286            None
287        }
288    }
289}
290
291async fn prose_consolidation(
292    deps: &SummarizationDeps,
293    numbered: &str,
294) -> Result<String, zeph_llm::LlmError> {
295    let timeout = deps.llm_timeout;
296    let consolidation_prompt = format!(
297        "<analysis>\n\
298         Merge these partial conversation summaries into a single structured compaction note.\n\
299         Produce exactly these 9 sections covering all partial summaries:\n\
300         1. User Intent\n2. Technical Concepts\n3. Files & Code\n4. Errors & Fixes\n\
301         5. Problem Solving\n6. User Messages\n7. Pending Tasks\n8. Current Work\n9. Next Step\n\
302         </analysis>\n\n\
303         Partial summaries:\n{numbered}"
304    );
305    let consolidation_msgs = [Message {
306        role: Role::User,
307        content: consolidation_prompt,
308        parts: vec![],
309        metadata: MessageMetadata::default(),
310    }];
311    tokio::time::timeout(timeout, deps.provider.chat(&consolidation_msgs))
312        .await
313        .map_err(|_| zeph_llm::LlmError::Timeout)?
314}
315
316/// Build a prose summarization prompt from a message slice and optional guidelines.
317///
318/// The returned string is suitable for sending to an LLM as a `User` message.
319/// Guidelines are injected inside `<compression-guidelines>` XML tags when non-empty.
320///
321/// # Examples
322///
323/// ```no_run
324/// use zeph_context::summarization::build_chunk_prompt;
325/// let prompt = build_chunk_prompt(&[], "be concise");
326/// assert!(prompt.contains("compression-guidelines"));
327/// ```
328#[must_use]
329pub fn build_chunk_prompt(messages: &[Message], guidelines: &str) -> String {
330    let history_text = format_history(messages);
331
332    let guidelines_section = guidelines_xml(guidelines);
333
334    format!(
335        "<analysis>\n\
336         Analyze this conversation and produce a structured compaction note for self-consumption.\n\
337         This note replaces the original messages in your context window — be thorough.\n\
338         Longer is better if it preserves actionable detail.\n\
339         </analysis>\n\
340         {guidelines_section}\n\
341         Produce exactly these 9 sections:\n\
342         1. User Intent — what the user is ultimately trying to accomplish\n\
343         2. Technical Concepts — key technologies, patterns, constraints discussed\n\
344         3. Files & Code — file paths, function names, structs, enums touched or relevant\n\
345         4. Errors & Fixes — every error encountered and whether/how it was resolved\n\
346         5. Problem Solving — approaches tried, decisions made, alternatives rejected\n\
347         6. User Messages — verbatim user requests that are still pending or relevant\n\
348         7. Pending Tasks — items explicitly promised or left TODO\n\
349         8. Current Work — the exact task in progress at the moment of compaction\n\
350         9. Next Step — the single most important action to take immediately after compaction\n\
351         \n\
352         Conversation:\n{history_text}"
353    )
354}
355
356/// Build a structured JSON summarization prompt for `AnchoredSummary` output.
357///
358/// The returned string is suitable for sending to an LLM as a `User` message.
359/// Guidelines are injected inside `<compression-guidelines>` XML tags when non-empty.
360///
361/// # Examples
362///
363/// ```no_run
364/// use zeph_context::summarization::build_anchored_summary_prompt;
365/// let prompt = build_anchored_summary_prompt(&[], "");
366/// assert!(prompt.contains("session_intent"));
367/// ```
368#[must_use]
369pub fn build_anchored_summary_prompt(messages: &[Message], guidelines: &str) -> String {
370    let history_text = format_history(messages);
371    let guidelines_section = guidelines_xml(guidelines);
372
373    format!(
374        "<analysis>\n\
375         You are compacting a conversation into a structured summary for self-consumption.\n\
376         This summary replaces the original messages in your context window.\n\
377         Every field MUST be populated — empty fields mean lost information.\n\
378         </analysis>\n\
379         {guidelines_section}\n\
380         Produce a JSON object with exactly these 5 fields:\n\
381         - session_intent: string — what the user is trying to accomplish\n\
382         - files_modified: string[] — file paths, function names, structs touched\n\
383         - decisions_made: string[] — each entry: \"Decision: X — Reason: Y\"\n\
384         - open_questions: string[] — unresolved questions or blockers\n\
385         - next_steps: string[] — concrete next actions\n\
386         \n\
387         Be thorough. Preserve all file paths, line numbers, error messages, \
388         and specific identifiers — they cannot be recovered.\n\
389         \n\
390         Conversation:\n{history_text}"
391    )
392}
393
394/// Build a last-resort metadata summary without calling the LLM.
395///
396/// Used when LLM summarization repeatedly fails. The result records message counts
397/// and truncated previews of the last user and assistant messages.
398#[must_use]
399pub fn build_metadata_summary(messages: &[Message], truncate: fn(&str, usize) -> String) -> String {
400    let mut user_count = 0usize;
401    let mut assistant_count = 0usize;
402    let mut system_count = 0usize;
403    let mut last_user = String::new();
404    let mut last_assistant = String::new();
405
406    for m in messages {
407        match m.role {
408            Role::User => {
409                user_count += 1;
410                if !m.content.is_empty() {
411                    last_user.clone_from(&m.content);
412                }
413            }
414            Role::Assistant => {
415                assistant_count += 1;
416                if !m.content.is_empty() {
417                    last_assistant.clone_from(&m.content);
418                }
419            }
420            Role::System => system_count += 1,
421        }
422    }
423
424    let last_user_preview = truncate(&last_user, 200);
425    let last_assistant_preview = truncate(&last_assistant, 200);
426
427    format!(
428        "[metadata summary — LLM compaction unavailable]\n\
429         Messages compacted: {} ({} user, {} assistant, {} system)\n\
430         Last user message: {last_user_preview}\n\
431         Last assistant message: {last_assistant_preview}",
432        messages.len(),
433        user_count,
434        assistant_count,
435        system_count,
436    )
437}
438
439/// Build a summarization prompt for a single tool-call pair.
440///
441/// The returned string is suitable for sending to an LLM as a `User` message.
442#[must_use]
443pub fn build_tool_pair_summary_prompt(req: &Message, res: &Message) -> String {
444    format!(
445        "Produce a concise but technically precise summary of this tool invocation.\n\
446         Preserve all facts that would be needed to continue work without re-running the tool:\n\
447         - Tool name and key input parameters (file paths, function names, patterns, line ranges)\n\
448         - Exact findings: line numbers, struct/enum/function names, error messages, numeric values\n\
449         - Outcome: what was found, changed, created, or confirmed\n\
450         Do NOT omit specific identifiers, paths, or numbers — they cannot be recovered later.\n\
451         Use 2-4 sentences maximum.\n\n\
452         <tool_request>\n{}\n</tool_request>\n\n<tool_response>\n{}\n</tool_response>",
453        req.content, res.content
454    )
455}
456
457/// Remove a fraction of tool-response messages from a conversation using a middle-out strategy.
458///
459/// `fraction` is in range `(0.0, 1.0]` — fraction of tool responses to replace with compact
460/// references. Tool outputs that have an overflow UUID are replaced with a `read_overflow`
461/// hint; others become `[compacted]`.
462///
463/// Returns the modified message list.
464#[allow(
465    clippy::cast_precision_loss,
466    clippy::cast_possible_truncation,
467    clippy::cast_sign_loss,
468    clippy::cast_possible_wrap
469)]
470#[must_use]
471pub fn remove_tool_responses_middle_out(mut messages: Vec<Message>, fraction: f32) -> Vec<Message> {
472    let tool_indices: Vec<usize> = messages
473        .iter()
474        .enumerate()
475        .filter(|(_, m)| {
476            m.parts.iter().any(|p| {
477                matches!(
478                    p,
479                    MessagePart::ToolResult { .. } | MessagePart::ToolOutput { .. }
480                )
481            })
482        })
483        .map(|(i, _)| i)
484        .collect();
485
486    if tool_indices.is_empty() {
487        return messages;
488    }
489
490    let n = tool_indices.len();
491    let to_remove = ((n as f32 * fraction).ceil() as usize).min(n);
492
493    let center = n / 2;
494    let mut remove_set: Vec<usize> = Vec::with_capacity(to_remove);
495    let mut left = center as isize - 1;
496    let mut right = center;
497    let mut count = 0;
498
499    while count < to_remove {
500        if right < n {
501            remove_set.push(tool_indices[right]);
502            count += 1;
503            right += 1;
504        }
505        if count < to_remove && left >= 0 {
506            let idx = left as usize;
507            if !remove_set.contains(&tool_indices[idx]) {
508                remove_set.push(tool_indices[idx]);
509                count += 1;
510            }
511        }
512        left -= 1;
513        if left < 0 && right >= n {
514            break;
515        }
516    }
517
518    for &msg_idx in &remove_set {
519        let msg = &mut messages[msg_idx];
520        for part in &mut msg.parts {
521            match part {
522                MessagePart::ToolResult { content, .. } => {
523                    let ref_notice = extract_overflow_ref(content).map_or_else(
524                        || String::from("[compacted]"),
525                        |uuid| {
526                            format!("[tool output pruned; use read_overflow {uuid} to retrieve]")
527                        },
528                    );
529                    *content = ref_notice;
530                }
531                MessagePart::ToolOutput {
532                    body, compacted_at, ..
533                } if compacted_at.is_none() => {
534                    let ref_notice = extract_overflow_ref(body)
535                        .map(|uuid| {
536                            format!("[tool output pruned; use read_overflow {uuid} to retrieve]")
537                        })
538                        .unwrap_or_default();
539                    *body = ref_notice;
540                    *compacted_at = Some(
541                        std::time::SystemTime::now()
542                            .duration_since(std::time::UNIX_EPOCH)
543                            .unwrap_or_default()
544                            .as_secs()
545                            .cast_signed(),
546                    );
547                }
548                _ => {}
549            }
550        }
551        msg.rebuild_content();
552    }
553    messages
554}
555
556/// Extract the overflow UUID from a tool output body, if present.
557///
558/// The overflow notice has the format:
559/// `\n[full output stored — ID: {uuid} — {bytes} bytes, use read_overflow tool to retrieve]`
560///
561/// Returns the UUID substring on success, or `None` if the notice is absent.
562#[must_use]
563pub fn extract_overflow_ref(body: &str) -> Option<&str> {
564    let start = body.find(OVERFLOW_NOTICE_PREFIX)?;
565    let rest = &body[start + OVERFLOW_NOTICE_PREFIX.len()..];
566    let end = rest.find(" \u{2014} ")?;
567    Some(&rest[..end])
568}
569
570fn format_history(messages: &[Message]) -> String {
571    let estimated_len: usize = messages
572        .iter()
573        .map(|m| "[assistant]: ".len() + m.content.len() + 2)
574        .sum();
575    let mut history_text = String::with_capacity(estimated_len);
576    for (i, m) in messages.iter().enumerate() {
577        if i > 0 {
578            history_text.push_str("\n\n");
579        }
580        let role = match m.role {
581            Role::User => "user",
582            Role::Assistant => "assistant",
583            Role::System => "system",
584        };
585        let _ = write!(history_text, "[{role}]: {}", m.content);
586    }
587    history_text
588}
589
590fn guidelines_xml(guidelines: &str) -> String {
591    if guidelines.is_empty() {
592        String::new()
593    } else {
594        format!("\n<compression-guidelines>\n{guidelines}\n</compression-guidelines>\n")
595    }
596}
597
598#[cfg(test)]
599mod tests {
600    use super::*;
601    use zeph_llm::provider::{Message, MessageMetadata, Role};
602
603    fn user_msg(content: &str) -> Message {
604        Message {
605            role: Role::User,
606            content: content.to_string(),
607            parts: vec![],
608            metadata: MessageMetadata::default(),
609        }
610    }
611
612    fn assistant_msg(content: &str) -> Message {
613        Message {
614            role: Role::Assistant,
615            content: content.to_string(),
616            parts: vec![],
617            metadata: MessageMetadata::default(),
618        }
619    }
620
621    #[test]
622    fn build_chunk_prompt_includes_guidelines_section() {
623        let msgs = [user_msg("hello")];
624        let prompt = build_chunk_prompt(&msgs, "be concise");
625        assert!(
626            prompt.contains("<compression-guidelines>"),
627            "prompt must include guidelines XML"
628        );
629        assert!(
630            prompt.contains("be concise"),
631            "prompt must embed the guidelines text"
632        );
633    }
634
635    #[test]
636    fn build_chunk_prompt_no_guidelines_omits_section() {
637        let prompt = build_chunk_prompt(&[], "");
638        assert!(
639            !prompt.contains("<compression-guidelines>"),
640            "empty guidelines must not produce the XML section"
641        );
642    }
643
644    #[test]
645    fn build_anchored_summary_prompt_contains_json_fields() {
646        let prompt = build_anchored_summary_prompt(&[], "");
647        assert!(prompt.contains("session_intent"));
648        assert!(prompt.contains("files_modified"));
649        assert!(prompt.contains("next_steps"));
650    }
651
652    #[test]
653    fn build_metadata_summary_counts_messages() {
654        let msgs = [user_msg("hi"), assistant_msg("hello"), user_msg("bye")];
655        let summary = build_metadata_summary(&msgs, |s, n| s.chars().take(n).collect());
656        assert!(summary.contains("3 (2 user, 1 assistant, 0 system)"));
657    }
658
659    #[test]
660    fn build_tool_pair_summary_prompt_contains_request_and_response() {
661        let req = user_msg("req content");
662        let res = assistant_msg("res content");
663        let prompt = build_tool_pair_summary_prompt(&req, &res);
664        assert!(prompt.contains("req content"));
665        assert!(prompt.contains("res content"));
666    }
667
668    #[test]
669    fn extract_overflow_ref_returns_uuid_when_present() {
670        let uuid = "550e8400-e29b-41d4-a716-446655440000";
671        let body =
672            format!("some output\n[full output stored \u{2014} ID: {uuid} \u{2014} 12345 bytes]");
673        assert_eq!(extract_overflow_ref(&body), Some(uuid));
674    }
675
676    #[test]
677    fn extract_overflow_ref_returns_none_when_absent() {
678        assert_eq!(extract_overflow_ref("normal output"), None);
679    }
680
681    fn tool_result_msg(content: &str) -> Message {
682        use zeph_llm::provider::MessagePart;
683        Message {
684            role: Role::User,
685            content: content.to_string(),
686            parts: vec![
687                MessagePart::ToolUse {
688                    id: "t1".into(),
689                    name: "bash".into(),
690                    input: serde_json::Value::Null,
691                },
692                MessagePart::ToolResult {
693                    tool_use_id: "t1".into(),
694                    content: content.to_string(),
695                    is_error: false,
696                },
697            ],
698            metadata: MessageMetadata::default(),
699        }
700    }
701
702    #[test]
703    fn remove_tool_responses_middle_out_clears_correct_fraction() {
704        // 4 tool messages, fraction=0.5 → ceil(4*0.5)=2 must be replaced with [compacted]
705        let mut messages = vec![
706            tool_result_msg("out0"),
707            tool_result_msg("out1"),
708            tool_result_msg("out2"),
709            tool_result_msg("out3"),
710        ];
711        messages = remove_tool_responses_middle_out(messages, 0.5);
712
713        let compacted_count = messages
714            .iter()
715            .flat_map(|m| m.parts.iter())
716            .filter(|p| {
717                if let zeph_llm::provider::MessagePart::ToolResult { content, .. } = p {
718                    content == "[compacted]"
719                } else {
720                    false
721                }
722            })
723            .count();
724
725        assert_eq!(
726            compacted_count, 2,
727            "ceil(4 * 0.5) = 2 tool results must be replaced with [compacted]"
728        );
729    }
730
731    #[test]
732    fn remove_tool_responses_middle_out_no_tool_messages_returns_unchanged() {
733        let messages = vec![user_msg("hello"), assistant_msg("hi")];
734        let result = remove_tool_responses_middle_out(messages.clone(), 0.5);
735        assert_eq!(result.len(), messages.len());
736        assert!(
737            result.iter().all(|m| m.parts.is_empty()),
738            "non-tool messages must be unchanged"
739        );
740    }
741}