Skip to main content

zeph_context/
summarization.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Pure prompt-building, compaction helpers, and async LLM summarization for context.
5//!
6//! Stateless functions take only `Message` slices and configuration values; they contain
7//! no agent state access. The `SummarizationDeps` struct provides explicit LLM dependencies
8//! for the async summarization functions, avoiding coupling to `Agent<C>`.
9//!
10//! The orchestration layer (`Agent::compact_context`, `Agent::maybe_compact`, etc.)
11//! lives in `zeph-core` and calls these helpers.
12
13use std::fmt::Write as _;
14use std::sync::Arc;
15use std::time::Duration;
16
17use futures::StreamExt as _;
18use zeph_common::OVERFLOW_NOTICE_PREFIX;
19use zeph_llm::LlmProvider as _;
20use zeph_llm::any::AnyProvider;
21use zeph_llm::provider::{Message, MessageMetadata, MessagePart, Role};
22use zeph_memory::{AnchoredSummary, TokenCounter};
23
24/// Explicit LLM dependencies for async summarization, avoiding coupling to `Agent<C>`.
25///
26/// Passed to [`single_pass_summary`], [`summarize_with_llm`], and [`summarize_structured`]
27/// so these functions can be called from `zeph-context` without depending on `zeph-core`.
28#[derive(Clone)]
29pub struct SummarizationDeps {
30    /// LLM provider used for all summarization calls.
31    pub provider: AnyProvider,
32    /// Timeout applied to each individual LLM call.
33    pub llm_timeout: Duration,
34    /// Token counter for chunking message slices.
35    pub token_counter: Arc<TokenCounter>,
36    /// Whether to attempt structured `AnchoredSummary` output before prose.
37    pub structured_summaries: bool,
38    /// Optional callback invoked with the `AnchoredSummary` result and a `fallback` flag.
39    ///
40    /// Used by `zeph-core` to write debug dumps without the `SummarizationDeps` knowing
41    /// about `DebugDumper`. Pass `None` when debug dumps are not needed.
42    #[allow(clippy::type_complexity)]
43    pub on_anchored_summary: Option<Arc<dyn Fn(&AnchoredSummary, bool) + Send + Sync>>,
44}
45
46/// Attempt structured summarization via `chat_typed_erased::<AnchoredSummary>()`.
47///
48/// Returns `Ok(AnchoredSummary)` on success, `Err` when mandatory fields are missing
49/// or the LLM fails. The caller is responsible for falling back to prose on `Err`.
50///
51/// # Errors
52/// Returns [`zeph_llm::LlmError`] when the LLM call fails, times out, or the
53/// returned summary is incomplete.
54pub async fn summarize_structured(
55    deps: &SummarizationDeps,
56    messages: &[Message],
57    guidelines: &str,
58) -> Result<AnchoredSummary, zeph_llm::LlmError> {
59    let prompt = build_anchored_summary_prompt(messages, guidelines);
60    let msgs = [Message {
61        role: Role::User,
62        content: prompt,
63        parts: vec![],
64        metadata: MessageMetadata::default(),
65    }];
66    let summary: AnchoredSummary = tokio::time::timeout(
67        deps.llm_timeout,
68        deps.provider.chat_typed_erased::<AnchoredSummary>(&msgs),
69    )
70    .await
71    .map_err(|_| zeph_llm::LlmError::Timeout)??;
72
73    if !summary.files_modified.is_empty() && summary.decisions_made.is_empty() {
74        tracing::warn!("structured summary: decisions_made is empty");
75    } else if summary.files_modified.is_empty() {
76        tracing::warn!(
77            "structured summary: files_modified is empty (may be a pure discussion session)"
78        );
79    }
80
81    if !summary.is_complete() {
82        tracing::warn!(
83            session_intent_empty = summary.session_intent.trim().is_empty(),
84            next_steps_empty = summary.next_steps.is_empty(),
85            "structured summary incomplete: mandatory fields missing, falling back to prose"
86        );
87        return Err(zeph_llm::LlmError::Other(
88            "structured summary missing mandatory fields".into(),
89        ));
90    }
91
92    if let Err(msg) = summary.validate() {
93        tracing::warn!(
94            error = %msg,
95            "structured summary failed field validation, falling back to prose"
96        );
97        return Err(zeph_llm::LlmError::Other(msg));
98    }
99
100    Ok(summary)
101}
102
103/// Single-pass LLM summarization over a message slice.
104///
105/// # Errors
106/// Returns [`zeph_llm::LlmError`] when the LLM call fails or times out.
107pub async fn single_pass_summary(
108    deps: &SummarizationDeps,
109    messages: &[Message],
110    guidelines: &str,
111) -> Result<String, zeph_llm::LlmError> {
112    let prompt = build_chunk_prompt(messages, guidelines);
113    let msgs = [Message {
114        role: Role::User,
115        content: prompt,
116        parts: vec![],
117        metadata: MessageMetadata::default(),
118    }];
119    tokio::time::timeout(deps.llm_timeout, deps.provider.chat(&msgs))
120        .await
121        .map_err(|_| zeph_llm::LlmError::Timeout)?
122}
123
124/// Chunked multi-pass LLM summarization with bounded concurrency.
125///
126/// Splits `messages` into token-bounded chunks and summarizes each chunk with the LLM.
127/// Partial results are consolidated into a final summary. Falls back to single-pass on
128/// chunk failures or context length errors.
129///
130/// # Errors
131/// Returns [`zeph_llm::LlmError`] when all summarization attempts fail.
132#[allow(clippy::too_many_lines)]
133pub async fn summarize_with_llm(
134    deps: &SummarizationDeps,
135    messages: &[Message],
136    guidelines: &str,
137) -> Result<String, zeph_llm::LlmError> {
138    const CHUNK_TOKEN_BUDGET: usize = 4096;
139    const OVERSIZED_THRESHOLD: usize = CHUNK_TOKEN_BUDGET / 2;
140
141    let chunks = crate::slot::chunk_messages(
142        messages,
143        CHUNK_TOKEN_BUDGET,
144        OVERSIZED_THRESHOLD,
145        &deps.token_counter,
146    );
147
148    if chunks.len() <= 1 {
149        return single_pass_summary(deps, messages, guidelines).await;
150    }
151
152    // Summarize chunks with bounded concurrency to prevent runaway API calls
153    let provider = deps.provider.clone();
154    let guidelines_owned = guidelines.to_string();
155    let timeout = deps.llm_timeout;
156    let results: Vec<_> = futures::stream::iter(chunks.into_iter().map(|chunk| {
157        let guidelines_ref = guidelines_owned.clone();
158        let prompt = build_chunk_prompt(&chunk, &guidelines_ref);
159        let p = provider.clone();
160        async move {
161            tokio::time::timeout(
162                timeout,
163                p.chat(&[Message {
164                    role: Role::User,
165                    content: prompt,
166                    parts: vec![],
167                    metadata: MessageMetadata::default(),
168                }]),
169            )
170            .await
171            .map_err(|_| zeph_llm::LlmError::Timeout)?
172        }
173    }))
174    .buffer_unordered(4)
175    .collect()
176    .await;
177
178    let partial_summaries: Vec<String> = results
179        .into_iter()
180        .collect::<Result<Vec<_>, zeph_llm::LlmError>>()
181        .unwrap_or_else(|e| {
182            tracing::warn!(
183                "chunked compaction: one or more chunks failed: {e:#}, falling back to single-pass"
184            );
185            Vec::new()
186        });
187
188    if partial_summaries.is_empty() {
189        return single_pass_summary(deps, messages, guidelines).await;
190    }
191
192    // Consolidate partial summaries
193    let numbered = {
194        let cap: usize = partial_summaries.iter().map(|s| s.len() + 8).sum();
195        let mut buf = String::with_capacity(cap);
196        for (i, s) in partial_summaries.iter().enumerate() {
197            if i > 0 {
198                buf.push_str("\n\n");
199            }
200            let _ = write!(buf, "{}. {s}", i + 1);
201        }
202        buf
203    };
204
205    if deps.structured_summaries {
206        let anchored_prompt = format!(
207            "<analysis>\n\
208             Merge these partial conversation summaries into a single structured summary.\n\
209             </analysis>\n\
210             \n\
211             Produce a JSON object with exactly these 5 fields:\n\
212             - session_intent: string — what the user is trying to accomplish\n\
213             - files_modified: string[] — file paths, function names, structs touched\n\
214             - decisions_made: string[] — each entry: \"Decision: X — Reason: Y\"\n\
215             - open_questions: string[] — unresolved questions or blockers\n\
216             - next_steps: string[] — concrete next actions\n\
217             \n\
218             Partial summaries:\n{numbered}"
219        );
220        let anchored_msgs = [Message {
221            role: Role::User,
222            content: anchored_prompt,
223            parts: vec![],
224            metadata: MessageMetadata::default(),
225        }];
226        match tokio::time::timeout(
227            timeout,
228            deps.provider
229                .chat_typed_erased::<AnchoredSummary>(&anchored_msgs),
230        )
231        .await
232        {
233            Ok(Ok(anchored)) if anchored.is_complete() => {
234                if let Some(ref cb) = deps.on_anchored_summary {
235                    cb(&anchored, false);
236                }
237                return Ok(crate::slot::cap_summary(anchored.to_markdown(), 16_000));
238            }
239            Ok(Ok(anchored)) => {
240                tracing::warn!(
241                    "chunked consolidation: structured summary incomplete, falling back to prose"
242                );
243                if let Some(ref cb) = deps.on_anchored_summary {
244                    cb(&anchored, true);
245                }
246            }
247            Ok(Err(e)) => {
248                tracing::warn!(error = %e, "chunked consolidation: structured output failed, falling back to prose");
249            }
250            Err(_) => {
251                tracing::warn!(
252                    "chunked consolidation: structured output timed out, falling back to prose"
253                );
254            }
255        }
256    }
257
258    let consolidation_prompt = format!(
259        "<analysis>\n\
260         Merge these partial conversation summaries into a single structured compaction note.\n\
261         Produce exactly these 9 sections covering all partial summaries:\n\
262         1. User Intent\n2. Technical Concepts\n3. Files & Code\n4. Errors & Fixes\n\
263         5. Problem Solving\n6. User Messages\n7. Pending Tasks\n8. Current Work\n9. Next Step\n\
264         </analysis>\n\n\
265         Partial summaries:\n{numbered}"
266    );
267
268    let consolidation_msgs = [Message {
269        role: Role::User,
270        content: consolidation_prompt,
271        parts: vec![],
272        metadata: MessageMetadata::default(),
273    }];
274    tokio::time::timeout(timeout, deps.provider.chat(&consolidation_msgs))
275        .await
276        .map_err(|_| zeph_llm::LlmError::Timeout)?
277}
278
279/// Build a prose summarization prompt from a message slice and optional guidelines.
280///
281/// The returned string is suitable for sending to an LLM as a `User` message.
282/// Guidelines are injected inside `<compression-guidelines>` XML tags when non-empty.
283///
284/// # Examples
285///
286/// ```no_run
287/// use zeph_context::summarization::build_chunk_prompt;
288/// let prompt = build_chunk_prompt(&[], "be concise");
289/// assert!(prompt.contains("compression-guidelines"));
290/// ```
291#[must_use]
292pub fn build_chunk_prompt(messages: &[Message], guidelines: &str) -> String {
293    let history_text = format_history(messages);
294
295    let guidelines_section = guidelines_xml(guidelines);
296
297    format!(
298        "<analysis>\n\
299         Analyze this conversation and produce a structured compaction note for self-consumption.\n\
300         This note replaces the original messages in your context window — be thorough.\n\
301         Longer is better if it preserves actionable detail.\n\
302         </analysis>\n\
303         {guidelines_section}\n\
304         Produce exactly these 9 sections:\n\
305         1. User Intent — what the user is ultimately trying to accomplish\n\
306         2. Technical Concepts — key technologies, patterns, constraints discussed\n\
307         3. Files & Code — file paths, function names, structs, enums touched or relevant\n\
308         4. Errors & Fixes — every error encountered and whether/how it was resolved\n\
309         5. Problem Solving — approaches tried, decisions made, alternatives rejected\n\
310         6. User Messages — verbatim user requests that are still pending or relevant\n\
311         7. Pending Tasks — items explicitly promised or left TODO\n\
312         8. Current Work — the exact task in progress at the moment of compaction\n\
313         9. Next Step — the single most important action to take immediately after compaction\n\
314         \n\
315         Conversation:\n{history_text}"
316    )
317}
318
319/// Build a structured JSON summarization prompt for `AnchoredSummary` output.
320///
321/// The returned string is suitable for sending to an LLM as a `User` message.
322/// Guidelines are injected inside `<compression-guidelines>` XML tags when non-empty.
323///
324/// # Examples
325///
326/// ```no_run
327/// use zeph_context::summarization::build_anchored_summary_prompt;
328/// let prompt = build_anchored_summary_prompt(&[], "");
329/// assert!(prompt.contains("session_intent"));
330/// ```
331#[must_use]
332pub fn build_anchored_summary_prompt(messages: &[Message], guidelines: &str) -> String {
333    let history_text = format_history(messages);
334    let guidelines_section = guidelines_xml(guidelines);
335
336    format!(
337        "<analysis>\n\
338         You are compacting a conversation into a structured summary for self-consumption.\n\
339         This summary replaces the original messages in your context window.\n\
340         Every field MUST be populated — empty fields mean lost information.\n\
341         </analysis>\n\
342         {guidelines_section}\n\
343         Produce a JSON object with exactly these 5 fields:\n\
344         - session_intent: string — what the user is trying to accomplish\n\
345         - files_modified: string[] — file paths, function names, structs touched\n\
346         - decisions_made: string[] — each entry: \"Decision: X — Reason: Y\"\n\
347         - open_questions: string[] — unresolved questions or blockers\n\
348         - next_steps: string[] — concrete next actions\n\
349         \n\
350         Be thorough. Preserve all file paths, line numbers, error messages, \
351         and specific identifiers — they cannot be recovered.\n\
352         \n\
353         Conversation:\n{history_text}"
354    )
355}
356
357/// Build a last-resort metadata summary without calling the LLM.
358///
359/// Used when LLM summarization repeatedly fails. The result records message counts
360/// and truncated previews of the last user and assistant messages.
361#[must_use]
362pub fn build_metadata_summary(messages: &[Message], truncate: fn(&str, usize) -> String) -> String {
363    let mut user_count = 0usize;
364    let mut assistant_count = 0usize;
365    let mut system_count = 0usize;
366    let mut last_user = String::new();
367    let mut last_assistant = String::new();
368
369    for m in messages {
370        match m.role {
371            Role::User => {
372                user_count += 1;
373                if !m.content.is_empty() {
374                    last_user.clone_from(&m.content);
375                }
376            }
377            Role::Assistant => {
378                assistant_count += 1;
379                if !m.content.is_empty() {
380                    last_assistant.clone_from(&m.content);
381                }
382            }
383            Role::System => system_count += 1,
384        }
385    }
386
387    let last_user_preview = truncate(&last_user, 200);
388    let last_assistant_preview = truncate(&last_assistant, 200);
389
390    format!(
391        "[metadata summary — LLM compaction unavailable]\n\
392         Messages compacted: {} ({} user, {} assistant, {} system)\n\
393         Last user message: {last_user_preview}\n\
394         Last assistant message: {last_assistant_preview}",
395        messages.len(),
396        user_count,
397        assistant_count,
398        system_count,
399    )
400}
401
402/// Build a summarization prompt for a single tool-call pair.
403///
404/// The returned string is suitable for sending to an LLM as a `User` message.
405#[must_use]
406pub fn build_tool_pair_summary_prompt(req: &Message, res: &Message) -> String {
407    format!(
408        "Produce a concise but technically precise summary of this tool invocation.\n\
409         Preserve all facts that would be needed to continue work without re-running the tool:\n\
410         - Tool name and key input parameters (file paths, function names, patterns, line ranges)\n\
411         - Exact findings: line numbers, struct/enum/function names, error messages, numeric values\n\
412         - Outcome: what was found, changed, created, or confirmed\n\
413         Do NOT omit specific identifiers, paths, or numbers — they cannot be recovered later.\n\
414         Use 2-4 sentences maximum.\n\n\
415         <tool_request>\n{}\n</tool_request>\n\n<tool_response>\n{}\n</tool_response>",
416        req.content, res.content
417    )
418}
419
420/// Remove a fraction of tool-response messages from a conversation using a middle-out strategy.
421///
422/// `fraction` is in range `(0.0, 1.0]` — fraction of tool responses to replace with compact
423/// references. Tool outputs that have an overflow UUID are replaced with a `read_overflow`
424/// hint; others become `[compacted]`.
425///
426/// Returns the modified message list.
427#[allow(
428    clippy::cast_precision_loss,
429    clippy::cast_possible_truncation,
430    clippy::cast_sign_loss,
431    clippy::cast_possible_wrap
432)]
433#[must_use]
434pub fn remove_tool_responses_middle_out(mut messages: Vec<Message>, fraction: f32) -> Vec<Message> {
435    let tool_indices: Vec<usize> = messages
436        .iter()
437        .enumerate()
438        .filter(|(_, m)| {
439            m.parts.iter().any(|p| {
440                matches!(
441                    p,
442                    MessagePart::ToolResult { .. } | MessagePart::ToolOutput { .. }
443                )
444            })
445        })
446        .map(|(i, _)| i)
447        .collect();
448
449    if tool_indices.is_empty() {
450        return messages;
451    }
452
453    let n = tool_indices.len();
454    let to_remove = ((n as f32 * fraction).ceil() as usize).min(n);
455
456    let center = n / 2;
457    let mut remove_set: Vec<usize> = Vec::with_capacity(to_remove);
458    let mut left = center as isize - 1;
459    let mut right = center;
460    let mut count = 0;
461
462    while count < to_remove {
463        if right < n {
464            remove_set.push(tool_indices[right]);
465            count += 1;
466            right += 1;
467        }
468        if count < to_remove && left >= 0 {
469            let idx = left as usize;
470            if !remove_set.contains(&tool_indices[idx]) {
471                remove_set.push(tool_indices[idx]);
472                count += 1;
473            }
474        }
475        left -= 1;
476        if left < 0 && right >= n {
477            break;
478        }
479    }
480
481    for &msg_idx in &remove_set {
482        let msg = &mut messages[msg_idx];
483        for part in &mut msg.parts {
484            match part {
485                MessagePart::ToolResult { content, .. } => {
486                    let ref_notice = extract_overflow_ref(content).map_or_else(
487                        || String::from("[compacted]"),
488                        |uuid| {
489                            format!("[tool output pruned; use read_overflow {uuid} to retrieve]")
490                        },
491                    );
492                    *content = ref_notice;
493                }
494                MessagePart::ToolOutput {
495                    body, compacted_at, ..
496                } => {
497                    if compacted_at.is_none() {
498                        let ref_notice = extract_overflow_ref(body)
499                            .map(|uuid| {
500                                format!(
501                                    "[tool output pruned; use read_overflow {uuid} to retrieve]"
502                                )
503                            })
504                            .unwrap_or_default();
505                        *body = ref_notice;
506                        *compacted_at = Some(
507                            std::time::SystemTime::now()
508                                .duration_since(std::time::UNIX_EPOCH)
509                                .unwrap_or_default()
510                                .as_secs()
511                                .cast_signed(),
512                        );
513                    }
514                }
515                _ => {}
516            }
517        }
518        msg.rebuild_content();
519    }
520    messages
521}
522
523/// Extract the overflow UUID from a tool output body, if present.
524///
525/// The overflow notice has the format:
526/// `\n[full output stored — ID: {uuid} — {bytes} bytes, use read_overflow tool to retrieve]`
527///
528/// Returns the UUID substring on success, or `None` if the notice is absent.
529#[must_use]
530pub fn extract_overflow_ref(body: &str) -> Option<&str> {
531    let start = body.find(OVERFLOW_NOTICE_PREFIX)?;
532    let rest = &body[start + OVERFLOW_NOTICE_PREFIX.len()..];
533    let end = rest.find(" \u{2014} ")?;
534    Some(&rest[..end])
535}
536
537fn format_history(messages: &[Message]) -> String {
538    let estimated_len: usize = messages
539        .iter()
540        .map(|m| "[assistant]: ".len() + m.content.len() + 2)
541        .sum();
542    let mut history_text = String::with_capacity(estimated_len);
543    for (i, m) in messages.iter().enumerate() {
544        if i > 0 {
545            history_text.push_str("\n\n");
546        }
547        let role = match m.role {
548            Role::User => "user",
549            Role::Assistant => "assistant",
550            Role::System => "system",
551        };
552        let _ = write!(history_text, "[{role}]: {}", m.content);
553    }
554    history_text
555}
556
557fn guidelines_xml(guidelines: &str) -> String {
558    if guidelines.is_empty() {
559        String::new()
560    } else {
561        format!("\n<compression-guidelines>\n{guidelines}\n</compression-guidelines>\n")
562    }
563}
564
565#[cfg(test)]
566mod tests {
567    use super::*;
568    use zeph_llm::provider::{Message, MessageMetadata, Role};
569
570    fn user_msg(content: &str) -> Message {
571        Message {
572            role: Role::User,
573            content: content.to_string(),
574            parts: vec![],
575            metadata: MessageMetadata::default(),
576        }
577    }
578
579    fn assistant_msg(content: &str) -> Message {
580        Message {
581            role: Role::Assistant,
582            content: content.to_string(),
583            parts: vec![],
584            metadata: MessageMetadata::default(),
585        }
586    }
587
588    #[test]
589    fn build_chunk_prompt_includes_guidelines_section() {
590        let msgs = [user_msg("hello")];
591        let prompt = build_chunk_prompt(&msgs, "be concise");
592        assert!(
593            prompt.contains("<compression-guidelines>"),
594            "prompt must include guidelines XML"
595        );
596        assert!(
597            prompt.contains("be concise"),
598            "prompt must embed the guidelines text"
599        );
600    }
601
602    #[test]
603    fn build_chunk_prompt_no_guidelines_omits_section() {
604        let prompt = build_chunk_prompt(&[], "");
605        assert!(
606            !prompt.contains("<compression-guidelines>"),
607            "empty guidelines must not produce the XML section"
608        );
609    }
610
611    #[test]
612    fn build_anchored_summary_prompt_contains_json_fields() {
613        let prompt = build_anchored_summary_prompt(&[], "");
614        assert!(prompt.contains("session_intent"));
615        assert!(prompt.contains("files_modified"));
616        assert!(prompt.contains("next_steps"));
617    }
618
619    #[test]
620    fn build_metadata_summary_counts_messages() {
621        let msgs = [user_msg("hi"), assistant_msg("hello"), user_msg("bye")];
622        let summary = build_metadata_summary(&msgs, |s, n| s.chars().take(n).collect());
623        assert!(summary.contains("3 (2 user, 1 assistant, 0 system)"));
624    }
625
626    #[test]
627    fn build_tool_pair_summary_prompt_contains_request_and_response() {
628        let req = user_msg("req content");
629        let res = assistant_msg("res content");
630        let prompt = build_tool_pair_summary_prompt(&req, &res);
631        assert!(prompt.contains("req content"));
632        assert!(prompt.contains("res content"));
633    }
634
635    #[test]
636    fn extract_overflow_ref_returns_uuid_when_present() {
637        let uuid = "550e8400-e29b-41d4-a716-446655440000";
638        let body =
639            format!("some output\n[full output stored \u{2014} ID: {uuid} \u{2014} 12345 bytes]");
640        assert_eq!(extract_overflow_ref(&body), Some(uuid));
641    }
642
643    #[test]
644    fn extract_overflow_ref_returns_none_when_absent() {
645        assert_eq!(extract_overflow_ref("normal output"), None);
646    }
647
648    fn tool_result_msg(content: &str) -> Message {
649        use zeph_llm::provider::MessagePart;
650        Message {
651            role: Role::User,
652            content: content.to_string(),
653            parts: vec![
654                MessagePart::ToolUse {
655                    id: "t1".into(),
656                    name: "bash".into(),
657                    input: serde_json::Value::Null,
658                },
659                MessagePart::ToolResult {
660                    tool_use_id: "t1".into(),
661                    content: content.to_string(),
662                    is_error: false,
663                },
664            ],
665            metadata: MessageMetadata::default(),
666        }
667    }
668
669    #[test]
670    fn remove_tool_responses_middle_out_clears_correct_fraction() {
671        // 4 tool messages, fraction=0.5 → ceil(4*0.5)=2 must be replaced with [compacted]
672        let mut messages = vec![
673            tool_result_msg("out0"),
674            tool_result_msg("out1"),
675            tool_result_msg("out2"),
676            tool_result_msg("out3"),
677        ];
678        messages = remove_tool_responses_middle_out(messages, 0.5);
679
680        let compacted_count = messages
681            .iter()
682            .flat_map(|m| m.parts.iter())
683            .filter(|p| {
684                if let zeph_llm::provider::MessagePart::ToolResult { content, .. } = p {
685                    content == "[compacted]"
686                } else {
687                    false
688                }
689            })
690            .count();
691
692        assert_eq!(
693            compacted_count, 2,
694            "ceil(4 * 0.5) = 2 tool results must be replaced with [compacted]"
695        );
696    }
697
698    #[test]
699    fn remove_tool_responses_middle_out_no_tool_messages_returns_unchanged() {
700        let messages = vec![user_msg("hello"), assistant_msg("hi")];
701        let result = remove_tool_responses_middle_out(messages.clone(), 0.5);
702        assert_eq!(result.len(), messages.len());
703        assert!(
704            result.iter().all(|m| m.parts.is_empty()),
705            "non-tool messages must be unchanged"
706        );
707    }
708}