tower_llm/auto_compaction/
mod.rs

1//! Auto-compaction layer for managing conversation context length
2//!
3//! This module provides automatic conversation compaction when context limits are reached.
4//! It can work reactively (on error) or proactively (when approaching limits).
5//!
6//! # Features
7//! - Reactive compaction on context length errors
8//! - Proactive compaction based on token thresholds
9//! - Configurable compaction strategies
10//! - Custom compaction prompts
11//! - Integration with existing provider infrastructure
12//!
13//! # Example
14//!
15//! ```no_run
16//! use tower_llm::{Agent, auto_compaction::{CompactionPolicy, CompactionStrategy, ProactiveThreshold}};
17//! use std::sync::Arc;
18//! use async_openai::{Client, config::OpenAIConfig};
19//!
20//! # async fn example() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
21//! let client = Arc::new(Client::<OpenAIConfig>::new());
22//!
23//! // Configure compaction to trigger at 1000 tokens
24//! let policy = CompactionPolicy {
25//!     compaction_model: "gpt-4o-mini".to_string(),
26//!     proactive_threshold: Some(ProactiveThreshold {
27//!         token_threshold: 1000,
28//!         percentage_threshold: None,
29//!     }),
30//!     compaction_strategy: CompactionStrategy::PreserveSystemAndRecent { recent_count: 5 },
31//!     ..Default::default()
32//! };
33//!
34//! let agent = Agent::builder(client)
35//!     .model("gpt-4o")
36//!     .auto_compaction(policy)
37//!     .build();
38//! # Ok(())
39//! # }
40//! ```
41
42use std::future::Future;
43use std::pin::Pin;
44use std::sync::Arc;
45
46use async_openai::types::{
47    ChatCompletionRequestMessage, ChatCompletionRequestSystemMessageArgs,
48    ChatCompletionRequestToolMessageArgs, ChatCompletionRequestUserMessageArgs,
49    CreateChatCompletionRequest, CreateChatCompletionRequestArgs,
50};
51use tower::{BoxError, Layer, Service, ServiceExt};
52
53use crate::core::StepOutcome;
54use crate::provider::ModelService;
55
56// ===== Core Types =====
57
58/// Strategy for handling orphaned tool calls during compaction
59#[derive(Clone, Debug)]
60pub enum OrphanedToolCallStrategy {
61    /// Drop orphaned tool calls before compaction and re-append after (default)
62    DropAndReappend,
63    /// Exclude messages with orphaned tool calls from compaction range
64    ExcludeFromCompaction,
65    /// Add placeholder tool responses for orphaned calls
66    AddPlaceholderResponses,
67    /// Fail compaction if orphaned tool calls are detected
68    FailOnOrphaned,
69}
70
71impl Default for OrphanedToolCallStrategy {
72    fn default() -> Self {
73        Self::DropAndReappend
74    }
75}
76
77/// Configuration for auto-compaction behavior
78#[derive(Clone)]
79pub struct CompactionPolicy {
80    /// Model to use for compaction (e.g., "gpt-4o-mini")
81    pub compaction_model: String,
82
83    /// Optional: Proactive threshold for triggering compaction
84    pub proactive_threshold: Option<ProactiveThreshold>,
85
86    /// Strategy for selecting messages to compact
87    pub compaction_strategy: CompactionStrategy,
88
89    /// Prompt to use for compaction
90    pub compaction_prompt: CompactionPrompt,
91
92    /// Maximum retries for compaction attempts
93    pub max_compaction_attempts: usize,
94
95    /// Strategy for handling orphaned tool calls
96    pub orphaned_tool_call_strategy: OrphanedToolCallStrategy,
97}
98
99impl Default for CompactionPolicy {
100    fn default() -> Self {
101        Self {
102            compaction_model: "gpt-4o-mini".to_string(),
103            proactive_threshold: None,
104            compaction_strategy: CompactionStrategy::PreserveSystemAndRecent { recent_count: 10 },
105            compaction_prompt: CompactionPrompt::Default,
106            max_compaction_attempts: 2,
107            orphaned_tool_call_strategy: OrphanedToolCallStrategy::default(),
108        }
109    }
110}
111
112/// Threshold configuration for proactive compaction
113#[derive(Clone, Debug)]
114pub struct ProactiveThreshold {
115    /// Token count at which to trigger compaction
116    pub token_threshold: usize,
117
118    /// Alternative: percentage of model's context window (0.0-1.0)
119    pub percentage_threshold: Option<f32>,
120}
121
122/// Strategy for selecting which messages to compact
123#[derive(Clone)]
124pub enum CompactionStrategy {
125    /// Compact all messages except the last N
126    CompactAllButLast(usize),
127
128    /// Compact messages older than N turns (user+assistant pairs)
129    CompactOlderThan(usize),
130
131    /// Keep system prompt and last N messages, compact the middle
132    PreserveSystemAndRecent { recent_count: usize },
133
134    /// Custom function to select which messages to compact
135    Custom(CompactionRangeFn),
136}
137
138/// Range of messages to compact
139#[derive(Clone, Debug)]
140pub struct CompactionRange {
141    /// Start index (inclusive)
142    pub start: usize,
143    /// End index (exclusive)
144    pub end: usize,
145}
146
147/// Type alias for custom compaction range function
148pub type CompactionRangeFn =
149    Arc<dyn Fn(&[ChatCompletionRequestMessage]) -> CompactionRange + Send + Sync>;
150
151/// Type alias for dynamic prompt generation function
152pub type PromptGeneratorFn = Arc<dyn Fn(&[ChatCompletionRequestMessage]) -> String + Send + Sync>;
153
154/// Compaction prompt configuration
155#[derive(Clone)]
156pub enum CompactionPrompt {
157    /// Use default prompt optimized for conversation summarization
158    Default,
159
160    /// Custom static prompt
161    Custom(String),
162
163    /// Dynamic prompt based on messages being compacted
164    Dynamic(PromptGeneratorFn),
165}
166
167impl CompactionPrompt {
168    fn generate(&self, messages: &[ChatCompletionRequestMessage]) -> String {
169        match self {
170            CompactionPrompt::Default => {
171                "Please provide a concise summary of the conversation above, preserving:\n\
172                1. The user's original intent and requirements\n\
173                2. Key decisions, conclusions, and action items\n\
174                3. Important context, constraints, and technical details\n\
175                4. Any errors or issues encountered and their resolutions\n\
176                5. Current state of any ongoing tasks\n\n\
177                Format the summary as a clear narrative that maintains conversation continuity."
178                    .to_string()
179            }
180            CompactionPrompt::Custom(prompt) => prompt.clone(),
181            CompactionPrompt::Dynamic(f) => f(messages),
182        }
183    }
184}
185
186/// Token counter trait for estimating message tokens
187pub trait TokenCounter: Send + Sync {
188    fn count_messages(&self, messages: &[ChatCompletionRequestMessage]) -> usize;
189}
190
191/// Simple token counter that estimates based on character count
192#[derive(Clone)]
193pub struct SimpleTokenCounter {
194    chars_per_token: f32,
195}
196
197impl SimpleTokenCounter {
198    pub fn new() -> Self {
199        Self {
200            chars_per_token: 4.0, // Rough estimate
201        }
202    }
203}
204
205impl Default for SimpleTokenCounter {
206    fn default() -> Self {
207        Self::new()
208    }
209}
210
211impl TokenCounter for SimpleTokenCounter {
212    fn count_messages(&self, messages: &[ChatCompletionRequestMessage]) -> usize {
213        let total_chars: usize = messages
214            .iter()
215            .map(|msg| {
216                // Estimate character count based on message type
217                match msg {
218                    ChatCompletionRequestMessage::System(m) => {
219                        match &m.content {
220                            async_openai::types::ChatCompletionRequestSystemMessageContent::Text(t) => t.len(),
221                            async_openai::types::ChatCompletionRequestSystemMessageContent::Array(_) => 100, // Rough estimate
222                        }
223                    }
224                    ChatCompletionRequestMessage::User(m) => {
225                        match &m.content {
226                            async_openai::types::ChatCompletionRequestUserMessageContent::Text(t) => t.len(),
227                            async_openai::types::ChatCompletionRequestUserMessageContent::Array(_) => 100,
228                        }
229                    }
230                    ChatCompletionRequestMessage::Assistant(m) => {
231                        // Assistant messages have optional content
232                        m.content.as_ref().map(|c| match c {
233                            async_openai::types::ChatCompletionRequestAssistantMessageContent::Text(t) => t.len(),
234                            async_openai::types::ChatCompletionRequestAssistantMessageContent::Array(_) => 100,
235                        }).unwrap_or(0)
236                    }
237                    ChatCompletionRequestMessage::Tool(m) => {
238                        // Tool messages have content enum
239                        match &m.content {
240                            async_openai::types::ChatCompletionRequestToolMessageContent::Text(t) => t.len(),
241                            async_openai::types::ChatCompletionRequestToolMessageContent::Array(_) => 100,
242                        }
243                    }
244                    ChatCompletionRequestMessage::Function(m) => {
245                        m.content.as_ref().map(|c| c.len()).unwrap_or(0)
246                    }
247                    ChatCompletionRequestMessage::Developer(m) => {
248                        match &m.content {
249                            async_openai::types::ChatCompletionRequestDeveloperMessageContent::Text(t) => t.len(),
250                            async_openai::types::ChatCompletionRequestDeveloperMessageContent::Array(_) => 100,
251                        }
252                    }
253                }
254            })
255            .sum();
256
257        (total_chars as f32 / self.chars_per_token) as usize
258    }
259}
260
261// ===== Layer Implementation =====
262
263/// Layer that adds auto-compaction to a Step service
264pub struct AutoCompactionLayer<P, C> {
265    policy: CompactionPolicy,
266    provider: P,
267    token_counter: Arc<C>,
268}
269
270impl<P, C> AutoCompactionLayer<P, C> {
271    pub fn new(policy: CompactionPolicy, provider: P, token_counter: C) -> Self {
272        Self {
273            policy,
274            provider,
275            token_counter: Arc::new(token_counter),
276        }
277    }
278}
279
280/// Service wrapper that performs auto-compaction
281pub struct AutoCompaction<S, P, C> {
282    pub(crate) inner: Arc<tokio::sync::Mutex<S>>,
283    pub(crate) policy: CompactionPolicy,
284    pub(crate) provider: Arc<tokio::sync::Mutex<P>>,
285    pub(crate) token_counter: Arc<C>,
286}
287
288impl<S, P, C> Layer<S> for AutoCompactionLayer<P, C>
289where
290    P: Clone,
291    C: Clone,
292{
293    type Service = AutoCompaction<S, P, C>;
294
295    fn layer(&self, inner: S) -> Self::Service {
296        AutoCompaction {
297            inner: Arc::new(tokio::sync::Mutex::new(inner)),
298            policy: self.policy.clone(),
299            provider: Arc::new(tokio::sync::Mutex::new(self.provider.clone())),
300            token_counter: self.token_counter.clone(),
301        }
302    }
303}
304
305impl<S, P, C> AutoCompaction<S, P, C>
306where
307    P: ModelService + Send + 'static,
308    P::Future: Send + 'static,
309    C: TokenCounter + 'static,
310{
311    /// Find all orphaned tool calls in the conversation
312    fn find_orphaned_tool_calls(messages: &[ChatCompletionRequestMessage]) -> Vec<usize> {
313        let mut orphaned_indices = Vec::new();
314
315        for (i, msg) in messages.iter().enumerate() {
316            if let ChatCompletionRequestMessage::Assistant(asst) = msg {
317                if let Some(tool_calls) = &asst.tool_calls {
318                    if !tool_calls.is_empty() {
319                        // Check if all tool calls have corresponding responses
320                        let mut all_calls_have_responses = true;
321                        for tool_call in tool_calls {
322                            let mut found_response = false;
323                            // Look for a tool response with matching ID after this message
324                            for subsequent_msg in messages.iter().skip(i + 1) {
325                                if let ChatCompletionRequestMessage::Tool(tool_msg) = subsequent_msg
326                                {
327                                    if tool_msg.tool_call_id == tool_call.id {
328                                        found_response = true;
329                                        break;
330                                    }
331                                }
332                                // Stop looking if we hit another assistant message (conversation moved on)
333                                if matches!(
334                                    subsequent_msg,
335                                    ChatCompletionRequestMessage::Assistant(_)
336                                ) {
337                                    break;
338                                }
339                            }
340                            if !found_response {
341                                all_calls_have_responses = false;
342                                break;
343                            }
344                        }
345
346                        if !all_calls_have_responses {
347                            orphaned_indices.push(i);
348                        }
349                    }
350                }
351            }
352        }
353
354        orphaned_indices
355    }
356
357    /// Check if messages have orphaned tool calls (tool calls without responses)
358    fn has_orphaned_tool_calls(messages: &[ChatCompletionRequestMessage]) -> bool {
359        !Self::find_orphaned_tool_calls(messages).is_empty()
360    }
361
362    /// Remove all orphaned tool call messages and return them separately
363    fn extract_all_orphaned_tool_calls(
364        messages: Vec<ChatCompletionRequestMessage>,
365    ) -> (
366        Vec<ChatCompletionRequestMessage>,
367        Vec<(usize, ChatCompletionRequestMessage)>,
368    ) {
369        let orphaned_indices = Self::find_orphaned_tool_calls(&messages);
370
371        if orphaned_indices.is_empty() {
372            return (messages, Vec::new());
373        }
374
375        let mut cleaned = Vec::new();
376        let mut orphaned = Vec::new();
377
378        for (i, msg) in messages.into_iter().enumerate() {
379            if orphaned_indices.contains(&i) {
380                orphaned.push((i, msg));
381            } else {
382                cleaned.push(msg);
383            }
384        }
385
386        (cleaned, orphaned)
387    }
388
389    /// Compact messages based on the configured strategy
390    pub(crate) async fn compact_messages(
391        &self,
392        messages: Vec<ChatCompletionRequestMessage>,
393    ) -> Result<Vec<ChatCompletionRequestMessage>, BoxError> {
394        // Handle orphaned tool calls based on strategy
395        let (messages_to_compact, orphaned_tool_calls) = if Self::has_orphaned_tool_calls(&messages)
396        {
397            match self.policy.orphaned_tool_call_strategy {
398                OrphanedToolCallStrategy::DropAndReappend => {
399                    let orphaned_count = Self::find_orphaned_tool_calls(&messages).len();
400                    tracing::debug!("Detected {} orphaned tool call message(s), dropping and will re-append after compaction", orphaned_count);
401                    Self::extract_all_orphaned_tool_calls(messages)
402                }
403                OrphanedToolCallStrategy::ExcludeFromCompaction => {
404                    tracing::debug!("Detected orphaned tool calls, excluding from compaction");
405                    // For this strategy, we keep orphaned messages but exclude them from compaction
406                    // This is handled by keeping all messages and adjusting the range
407                    (messages, Vec::new())
408                }
409                OrphanedToolCallStrategy::AddPlaceholderResponses => {
410                    tracing::debug!("Detected orphaned tool calls, adding placeholder responses");
411                    // Add placeholder tool responses for all orphaned tool calls
412                    let mut fixed_messages = messages.clone();
413                    let orphaned_indices = Self::find_orphaned_tool_calls(&messages);
414
415                    // Process in reverse order to maintain indices
416                    for &idx in orphaned_indices.iter().rev() {
417                        if let ChatCompletionRequestMessage::Assistant(asst) = &messages[idx] {
418                            if let Some(tool_calls) = &asst.tool_calls {
419                                // Insert placeholder responses right after the assistant message
420                                let mut placeholders = Vec::new();
421                                for tool_call in tool_calls {
422                                    let placeholder =
423                                        ChatCompletionRequestToolMessageArgs::default()
424                                            .content("[Pending tool execution]")
425                                            .tool_call_id(&tool_call.id)
426                                            .build()?;
427                                    placeholders.push(placeholder.into());
428                                }
429                                // Insert all placeholders after the assistant message
430                                for (i, placeholder) in placeholders.into_iter().enumerate() {
431                                    fixed_messages.insert(idx + 1 + i, placeholder);
432                                }
433                            }
434                        }
435                    }
436                    (fixed_messages, Vec::new())
437                }
438                OrphanedToolCallStrategy::FailOnOrphaned => {
439                    let orphaned_count = Self::find_orphaned_tool_calls(&messages).len();
440                    return Err(format!(
441                        "Cannot compact: conversation has {} orphaned tool call message(s)",
442                        orphaned_count
443                    )
444                    .into());
445                }
446            }
447        } else {
448            (messages, Vec::new())
449        };
450
451        // Determine which messages to compact based on strategy
452        let range = match &self.policy.compaction_strategy {
453            CompactionStrategy::CompactAllButLast(n) => {
454                let start = 0;
455                let end = messages_to_compact.len().saturating_sub(*n);
456                CompactionRange { start, end }
457            }
458            CompactionStrategy::CompactOlderThan(turns) => {
459                // Count back N user+assistant pairs
460                let mut turn_count = 0;
461                let mut cutoff = messages_to_compact.len();
462                for (i, msg) in messages_to_compact.iter().enumerate().rev() {
463                    if matches!(msg, ChatCompletionRequestMessage::User(_)) {
464                        turn_count += 1;
465                        if turn_count >= *turns {
466                            cutoff = i;
467                            break;
468                        }
469                    }
470                }
471                CompactionRange {
472                    start: 0,
473                    end: cutoff,
474                }
475            }
476            CompactionStrategy::PreserveSystemAndRecent { recent_count } => {
477                // Find first non-system message
478                let start = messages_to_compact
479                    .iter()
480                    .position(|m| !matches!(m, ChatCompletionRequestMessage::System(_)))
481                    .unwrap_or(0);
482                let end = messages_to_compact.len().saturating_sub(*recent_count);
483                CompactionRange {
484                    start,
485                    end: end.max(start),
486                }
487            }
488            CompactionStrategy::Custom(f) => f(&messages_to_compact),
489        };
490
491        // If nothing to compact, return original with orphaned tool calls if any
492        if range.start >= range.end {
493            let mut result = messages_to_compact;
494            if !orphaned_tool_calls.is_empty() {
495                // Re-append orphaned messages
496                for (_, orphaned_msg) in orphaned_tool_calls {
497                    result.push(orphaned_msg);
498                }
499            }
500            return Ok(result);
501        }
502
503        // Extract messages to compact, extending end to include trailing tool responses
504        let mut adjusted_end = range.end;
505        if adjusted_end > range.start {
506            if let ChatCompletionRequestMessage::Assistant(asst) =
507                &messages_to_compact[adjusted_end - 1]
508            {
509                if let Some(tool_calls) = &asst.tool_calls {
510                    let ids: std::collections::HashSet<&str> =
511                        tool_calls.iter().map(|tc| tc.id.as_str()).collect();
512                    while adjusted_end < messages_to_compact.len() {
513                        if let ChatCompletionRequestMessage::Tool(t) =
514                            &messages_to_compact[adjusted_end]
515                        {
516                            if ids.contains(t.tool_call_id.as_str()) {
517                                adjusted_end += 1;
518                                continue;
519                            }
520                        }
521                        break;
522                    }
523                }
524            }
525        }
526        let to_compact = &messages_to_compact[range.start..adjusted_end];
527        let prompt = self.policy.compaction_prompt.generate(to_compact);
528
529        // Build compaction request
530        let mut builder = CreateChatCompletionRequestArgs::default();
531        builder.model(&self.policy.compaction_model);
532
533        // Start with a system prompt
534        let mut compact_messages = vec![ChatCompletionRequestSystemMessageArgs::default()
535            .content(prompt)
536            .build()?
537            .into()];
538
539        // Identify the last assistant-with-tool_calls inside the slice
540        let mut last_asst_rel_idx: Option<usize> = None;
541        for (i, msg) in to_compact.iter().enumerate() {
542            if let ChatCompletionRequestMessage::Assistant(asst) = msg {
543                if asst
544                    .tool_calls
545                    .as_ref()
546                    .map(|v| !v.is_empty())
547                    .unwrap_or(false)
548                {
549                    last_asst_rel_idx = Some(i);
550                }
551            }
552        }
553
554        if let Some(rel_idx) = last_asst_rel_idx {
555            // Gather tool ids from the last assistant
556            let abs_last = range.start + rel_idx;
557            let mut remaining_ids: std::collections::HashSet<String> =
558                if let ChatCompletionRequestMessage::Assistant(asst) =
559                    &messages_to_compact[abs_last]
560                {
561                    asst.tool_calls
562                        .as_ref()
563                        .map(|tc| tc.iter().map(|t| t.id.clone()).collect())
564                        .unwrap_or_else(std::collections::HashSet::new)
565                } else {
566                    std::collections::HashSet::new()
567                };
568
569            // Phase 1: push messages up to and including the assistant (skip leading system)
570            let mut pushed_non_system = false;
571            for msg in &to_compact[..=rel_idx] {
572                if !pushed_non_system {
573                    if matches!(msg, ChatCompletionRequestMessage::System(_)) {
574                        continue;
575                    } else {
576                        pushed_non_system = true;
577                    }
578                }
579                compact_messages.push(msg.clone());
580            }
581
582            // Phase 2: scan forward beyond the slice for matching tool responses, skipping non-tool messages
583            let mut extra_tools: Vec<ChatCompletionRequestMessage> = Vec::new();
584            if !remaining_ids.is_empty() {
585                let mut idx = abs_last + 1;
586                while idx < messages_to_compact.len() && !remaining_ids.is_empty() {
587                    match &messages_to_compact[idx] {
588                        ChatCompletionRequestMessage::Tool(t) => {
589                            if remaining_ids.contains(&t.tool_call_id) {
590                                extra_tools.push(messages_to_compact[idx].clone());
591                                remaining_ids.remove(&t.tool_call_id);
592                            }
593                        }
594                        ChatCompletionRequestMessage::Assistant(_) => {
595                            break; // stop at next assistant
596                        }
597                        _ => {
598                            // skip non-tool messages
599                        }
600                    }
601                    idx += 1;
602                }
603            }
604
605            // Append extra tool responses (contiguously after the assistant)
606            compact_messages.extend(extra_tools);
607
608            // Add instruction to summarize if needed
609            let last_is_user = matches!(
610                compact_messages.last(),
611                Some(ChatCompletionRequestMessage::User(_))
612            );
613            if !last_is_user {
614                compact_messages.push(
615                    ChatCompletionRequestUserMessageArgs::default()
616                        .content("Please provide a summary of the above conversation following the instructions.")
617                        .build()?
618                        .into(),
619                );
620            }
621
622            builder.messages(compact_messages);
623        } else {
624            // Fallback: no assistant with tool_calls inside the slice; include the slice as-is (skipping leading system)
625            let mut pushed_non_system = false;
626            for msg in to_compact {
627                if !pushed_non_system {
628                    if matches!(msg, ChatCompletionRequestMessage::System(_)) {
629                        continue;
630                    } else {
631                        pushed_non_system = true;
632                    }
633                }
634                compact_messages.push(msg.clone());
635            }
636
637            let last_is_user = matches!(
638                compact_messages.last(),
639                Some(ChatCompletionRequestMessage::User(_))
640            );
641            if !last_is_user {
642                compact_messages.push(
643                    ChatCompletionRequestUserMessageArgs::default()
644                        .content("Please provide a summary of the above conversation following the instructions.")
645                        .build()?
646                        .into(),
647                );
648            }
649
650            builder.messages(compact_messages);
651        }
652        let compact_req = builder.build()?;
653
654        // Call compaction model
655        let mut provider = self.provider.lock().await;
656        let response = ServiceExt::ready(&mut *provider)
657            .await?
658            .call(compact_req)
659            .await?;
660        drop(provider);
661
662        // Build new message list with compacted summary
663        let mut result = Vec::new();
664
665        // Keep messages before the compacted range
666        for msg in &messages_to_compact[..range.start] {
667            result.push(msg.clone());
668        }
669
670        // Add the compacted summary as a user message to maintain role validity
671        if let Some(summary) = response.assistant.content {
672            result.push(
673                ChatCompletionRequestUserMessageArgs::default()
674                    .content(format!("[Previous conversation summary]: {}", summary))
675                    .build()?
676                    .into(),
677            );
678        }
679
680        // Keep messages after the compacted range
681        for msg in &messages_to_compact[adjusted_end..] {
682            result.push(msg.clone());
683        }
684
685        // Re-append orphaned tool calls if we dropped them earlier
686        if !orphaned_tool_calls.is_empty() {
687            tracing::debug!(
688                "Re-appending {} orphaned tool call message(s) after compaction",
689                orphaned_tool_calls.len()
690            );
691
692            // Sort by original index to maintain relative order
693            let mut sorted_orphaned = orphaned_tool_calls;
694            sorted_orphaned.sort_by_key(|(idx, _)| *idx);
695
696            // For simplicity, append all orphaned messages at the end
697            // This maintains the conversation flow while ensuring all tool calls are preserved
698            for (_, orphaned_msg) in sorted_orphaned {
699                result.push(orphaned_msg);
700            }
701        }
702
703        // Normalize: drop empty assistant messages and coalesce adjacent assistants
704        let mut normalized: Vec<ChatCompletionRequestMessage> = Vec::with_capacity(result.len());
705        for msg in result.into_iter() {
706            if let Some(ChatCompletionRequestMessage::Assistant(prev)) = normalized.last_mut() {
707                if let ChatCompletionRequestMessage::Assistant(curr) = &msg {
708                    let curr_empty = matches!(&curr.content, Some(async_openai::types::ChatCompletionRequestAssistantMessageContent::Text(t)) if t.is_empty());
709                    let no_calls = prev
710                        .tool_calls
711                        .as_ref()
712                        .map(|v| v.is_empty())
713                        .unwrap_or(true)
714                        && curr
715                            .tool_calls
716                            .as_ref()
717                            .map(|v| v.is_empty())
718                            .unwrap_or(true);
719                    if curr_empty && no_calls {
720                        // skip pushing empty assistant
721                        continue;
722                    }
723                    if no_calls {
724                        let prev_text = match &prev.content {
725                            Some(async_openai::types::ChatCompletionRequestAssistantMessageContent::Text(t)) => Some(t.clone()),
726                            _ => None,
727                        };
728                        let curr_text = match &curr.content {
729                            Some(async_openai::types::ChatCompletionRequestAssistantMessageContent::Text(t)) => Some(t.clone()),
730                            _ => None,
731                        };
732                        if let (Some(p), Some(c)) = (prev_text, curr_text) {
733                            let combined = if p.is_empty() {
734                                c
735                            } else if c.is_empty() {
736                                p
737                            } else {
738                                format!("{}\n\n{}", p, c)
739                            };
740                            prev.content = Some(async_openai::types::ChatCompletionRequestAssistantMessageContent::Text(combined));
741                            continue;
742                        }
743                    }
744                }
745            }
746
747            // default push (dropping empty assistants)
748            if let ChatCompletionRequestMessage::Assistant(a) = &msg {
749                let is_empty = matches!(&a.content, Some(async_openai::types::ChatCompletionRequestAssistantMessageContent::Text(t)) if t.is_empty())
750                    && a.tool_calls.as_ref().map(|v| v.is_empty()).unwrap_or(true);
751                if is_empty {
752                    continue;
753                }
754            }
755            normalized.push(msg);
756        }
757
758        Ok(normalized)
759    }
760
761    /// Check if the error is a context length error
762    fn is_context_length_error(error: &BoxError) -> bool {
763        let error_str = error.to_string().to_lowercase();
764        error_str.contains("context_length_exceeded")
765            || error_str.contains("context length")
766            || error_str.contains("maximum context")
767            || error_str.contains("token limit")
768    }
769}
770
771impl<S, P, C> Service<CreateChatCompletionRequest> for AutoCompaction<S, P, C>
772where
773    S: Service<CreateChatCompletionRequest, Response = StepOutcome, Error = BoxError>
774        + Send
775        + 'static,
776    S::Future: Send + 'static,
777    P: ModelService + Send + 'static,
778    P::Future: Send + 'static,
779    C: TokenCounter + 'static,
780{
781    type Response = StepOutcome;
782    type Error = BoxError;
783    type Future = Pin<Box<dyn Future<Output = Result<Self::Response, Self::Error>> + Send>>;
784
785    fn poll_ready(
786        &mut self,
787        _cx: &mut std::task::Context<'_>,
788    ) -> std::task::Poll<Result<(), Self::Error>> {
789        std::task::Poll::Ready(Ok(()))
790    }
791
792    fn call(&mut self, req: CreateChatCompletionRequest) -> Self::Future {
793        let inner = self.inner.clone();
794        let policy = self.policy.clone();
795        let provider = self.provider.clone();
796        let token_counter = self.token_counter.clone();
797
798        Box::pin(async move {
799            let mut current_messages = req.messages.clone();
800            let mut attempts = 0;
801
802            // Check for proactive compaction
803            if let Some(threshold) = &policy.proactive_threshold {
804                let token_count = token_counter.count_messages(&current_messages);
805
806                if token_count > threshold.token_threshold {
807                    // Proactively compact
808                    let compactor = AutoCompaction {
809                        inner: inner.clone(),
810                        policy: policy.clone(),
811                        provider: provider.clone(),
812                        token_counter: token_counter.clone(),
813                    };
814
815                    match compactor.compact_messages(current_messages.clone()).await {
816                        Ok(compacted) => {
817                            current_messages = compacted;
818                        }
819                        Err(e) => {
820                            // Log but don't fail - continue with original messages
821                            tracing::warn!("Proactive compaction failed: {}", e);
822                        }
823                    }
824                }
825            }
826
827            loop {
828                // Build request with current messages
829                let mut builder = CreateChatCompletionRequestArgs::default();
830                builder.model(&req.model);
831                builder.messages(current_messages.clone());
832                if let Some(t) = req.temperature {
833                    builder.temperature(t);
834                }
835                #[allow(deprecated)]
836                if let Some(mt) = req.max_tokens {
837                    builder.max_tokens(mt);
838                }
839                if let Some(mct) = req.max_completion_tokens {
840                    builder.max_completion_tokens(mct);
841                }
842                if let Some(tools) = req.tools.clone() {
843                    builder.tools(tools);
844                }
845                let current_req = builder.build()?;
846
847                // Try to call inner service
848                let mut guard = inner.lock().await;
849                let result = ServiceExt::ready(&mut *guard)
850                    .await?
851                    .call(current_req)
852                    .await;
853                drop(guard);
854
855                match result {
856                    Ok(outcome) => return Ok(outcome),
857                    Err(e) => {
858                        // Check if it's a context length error
859                        if Self::is_context_length_error(&e)
860                            && attempts < policy.max_compaction_attempts
861                        {
862                            attempts += 1;
863
864                            // Attempt reactive compaction
865                            let compactor = AutoCompaction {
866                                inner: inner.clone(),
867                                policy: policy.clone(),
868                                provider: provider.clone(),
869                                token_counter: token_counter.clone(),
870                            };
871
872                            match compactor.compact_messages(current_messages.clone()).await {
873                                Ok(compacted) => {
874                                    tracing::info!(
875                                        "Reactive compaction successful, retrying request"
876                                    );
877                                    current_messages = compacted;
878                                    // Loop will retry with compacted messages
879                                }
880                                Err(compact_err) => {
881                                    return Err(format!(
882                                        "Context length error and compaction failed: original={}, compact={}",
883                                        e, compact_err
884                                    ).into());
885                                }
886                            }
887                        } else {
888                            return Err(e);
889                        }
890                    }
891                }
892            }
893        })
894    }
895}
896
897#[cfg(test)]
898mod tests {
899    use super::*;
900    use crate::validation::gen;
901    use crate::validation::{validate_conversation, ValidationPolicy};
902    use async_openai::types::ChatCompletionRequestAssistantMessageArgs;
903    use proptest::prelude::*;
904    use std::sync::Arc as StdArc;
905    use tokio::sync::Mutex as AsyncMutex;
906    use tower::Service;
907
908    #[test]
909    fn test_simple_token_counter() {
910        let counter = SimpleTokenCounter::new();
911        let messages = vec![
912            ChatCompletionRequestSystemMessageArgs::default()
913                .content("You are a helpful assistant.")
914                .build()
915                .unwrap()
916                .into(),
917            ChatCompletionRequestUserMessageArgs::default()
918                .content("Hello, how are you?")
919                .build()
920                .unwrap()
921                .into(),
922        ];
923
924        let count = counter.count_messages(&messages);
925        assert!(count > 0);
926        // Rough estimate: ~50 chars / 4 = ~12 tokens
927        assert!((10..=20).contains(&count));
928    }
929
930    // =============================================================================================
931    // Boundary and slice-invariance tests for compaction preflight requests
932    // =============================================================================================
933
934    /// Build a minimal AutoCompaction with a capturing provider used only for compact_messages
935    fn make_compactor_with_policy(
936        policy: CompactionPolicy,
937        provider: CapturingProvider,
938    ) -> AutoCompaction<(), CapturingProvider, SimpleTokenCounter> {
939        AutoCompaction {
940            inner: Arc::new(tokio::sync::Mutex::new(())),
941            policy,
942            provider: Arc::new(tokio::sync::Mutex::new(provider)),
943            token_counter: Arc::new(SimpleTokenCounter::new()),
944        }
945    }
946
947    fn asst_with_tool_calls(ids: &[&str]) -> ChatCompletionRequestMessage {
948        use async_openai::types::{
949            ChatCompletionMessageToolCall, ChatCompletionToolType, FunctionCall,
950        };
951        let calls: Vec<ChatCompletionMessageToolCall> = ids
952            .iter()
953            .map(|id| ChatCompletionMessageToolCall {
954                id: (*id).to_string(),
955                r#type: ChatCompletionToolType::Function,
956                function: FunctionCall {
957                    name: "x".to_string(),
958                    arguments: "{}".to_string(),
959                },
960            })
961            .collect();
962        ChatCompletionRequestAssistantMessageArgs::default()
963            .content("")
964            .tool_calls(calls)
965            .build()
966            .unwrap()
967            .into()
968    }
969
970    fn tool_msg(id: &str, content: &str) -> ChatCompletionRequestMessage {
971        ChatCompletionRequestToolMessageArgs::default()
972            .tool_call_id(id)
973            .content(content)
974            .build()
975            .unwrap()
976            .into()
977    }
978
979    fn user_msg(text: &str) -> ChatCompletionRequestMessage {
980        ChatCompletionRequestUserMessageArgs::default()
981            .content(text)
982            .build()
983            .unwrap()
984            .into()
985    }
986
987    fn system_msg(text: &str) -> ChatCompletionRequestMessage {
988        ChatCompletionRequestSystemMessageArgs::default()
989            .content(text)
990            .build()
991            .unwrap()
992            .into()
993    }
994
995    #[test]
996    fn compaction_preflight_excludes_tools_at_slice_end_custom_end() {
997        // TODO: tests_compaction_slice_end_missing_tools
998        // Construct a transcript that is acceptable under default validation (contiguity not enforced),
999        // but where we select a slice that ends immediately after an assistant with tool_calls
1000        // (thus excluding tool responses), provoking a preflight request that violates tool invariants.
1001
1002        // Messages: [sys, user1, assistant(tc c1), user_interrupt, tool(c1)]
1003        let msgs = vec![
1004            system_msg("S"),
1005            user_msg("U1"),
1006            asst_with_tool_calls(&["c1"]),
1007            user_msg("interrupt"),
1008            tool_msg("c1", "{}"),
1009        ];
1010
1011        // Policy: Custom compaction range to include up to (but not including) the interrupt user
1012        // so the slice ends with the assistant with tool_calls.
1013        let range_fn: PromptGeneratorFn = Arc::new(|_m| "".to_string());
1014        let range_select: CompactionRangeFn = Arc::new(|m| {
1015            // Find assistant with tool_calls index (expect index 2), set end = idx + 1
1016            let idx = m
1017                .iter()
1018                .position(|mm| matches!(mm, ChatCompletionRequestMessage::Assistant(_)))
1019                .unwrap();
1020            CompactionRange {
1021                start: 1,
1022                end: idx + 1,
1023            }
1024        });
1025        let policy = CompactionPolicy {
1026            compaction_model: "gpt-4o-mini".into(),
1027            proactive_threshold: None,
1028            compaction_strategy: CompactionStrategy::Custom(range_select),
1029            compaction_prompt: CompactionPrompt::Dynamic(range_fn),
1030            max_compaction_attempts: 1,
1031            orphaned_tool_call_strategy: OrphanedToolCallStrategy::DropAndReappend,
1032        };
1033
1034        // Capturing provider to inspect the preflight request sent to the model
1035        let provider = CapturingProvider::new();
1036        let provider_clone = provider.clone();
1037        let ac = make_compactor_with_policy(policy, provider_clone);
1038
1039        let rt = tokio::runtime::Runtime::new().unwrap();
1040        let out = rt
1041            .block_on(async move { ac.compact_messages(msgs.clone()).await })
1042            .unwrap();
1043
1044        // Preflight request should violate tool invariants: assistant tool_calls without tool responses
1045        let req = rt
1046            .block_on(async move { provider.get().await })
1047            .expect("captured");
1048        let strict = ValidationPolicy {
1049            require_user_first: false,
1050            allow_repeated_roles: true, // allow tool blocks
1051            enforce_contiguous_tool_responses: true,
1052            ..Default::default()
1053        };
1054        assert!(
1055            validate_conversation(&req.messages, &strict).is_none(),
1056            "expected preflight to be valid when slice ends after assistant (slice-aware)"
1057        );
1058
1059        // Post-compaction output exists (we're not asserting validity here)
1060        assert!(!out.is_empty());
1061    }
1062
1063    #[test]
1064    fn compaction_slice_starts_with_tool_message_custom_start() {
1065        // TODO: tests_compaction_slice_starts_with_tool
1066        // Construct a valid transcript, then select a slice that begins at a tool message.
1067        let msgs = vec![
1068            system_msg("S"),
1069            user_msg("U1"),
1070            asst_with_tool_calls(&["c1"]),
1071            tool_msg("c1", "{}"),
1072            user_msg("U2"),
1073        ];
1074
1075        // Custom range: start at the first Tool message index
1076        let range_fn: PromptGeneratorFn = Arc::new(|_m| "".to_string());
1077        let range_select: CompactionRangeFn = Arc::new(|m| {
1078            let start = m
1079                .iter()
1080                .position(|mm| matches!(mm, ChatCompletionRequestMessage::Tool(_)))
1081                .unwrap();
1082            CompactionRange {
1083                start,
1084                end: start + 1,
1085            }
1086        });
1087        let policy = CompactionPolicy {
1088            compaction_model: "gpt-4o-mini".into(),
1089            proactive_threshold: None,
1090            compaction_strategy: CompactionStrategy::Custom(range_select),
1091            compaction_prompt: CompactionPrompt::Dynamic(range_fn),
1092            max_compaction_attempts: 1,
1093            orphaned_tool_call_strategy: OrphanedToolCallStrategy::DropAndReappend,
1094        };
1095
1096        let provider = CapturingProvider::new();
1097        let ac = make_compactor_with_policy(policy, provider.clone());
1098        let rt = tokio::runtime::Runtime::new().unwrap();
1099        let _ = rt
1100            .block_on(async move { ac.compact_messages(msgs).await })
1101            .unwrap();
1102        let req = rt
1103            .block_on(async move { provider.get().await })
1104            .expect("captured");
1105
1106        // Starting with a Tool message should be invalid (tool before assistant)
1107        let strict = ValidationPolicy {
1108            require_user_first: false,
1109            ..Default::default()
1110        };
1111        let violations = validate_conversation(&req.messages, &strict).expect("violations");
1112        assert!(
1113            violations.iter().any(|v| matches!(
1114                v.code,
1115                crate::validation::ViolationCode::ToolBeforeAssistant { .. }
1116            )),
1117            "expected ToolBeforeAssistant violation in preflight request"
1118        );
1119    }
1120
1121    #[test]
1122    fn compaction_appends_user_after_assistant_with_missing_tools() {
1123        // TODO: tests_compaction_appended_user_after_tools_missing
1124        // Similar to the first test, but assert the appended user instruction appears
1125        // immediately after an assistant with tool_calls when no tools are in-slice.
1126        let msgs = vec![
1127            system_msg("S"),
1128            user_msg("U1"),
1129            asst_with_tool_calls(&["c1"]),
1130            user_msg("interrupt"),
1131            tool_msg("c1", "{}"),
1132        ];
1133
1134        // Select range that ends right after assistant (excludes tool response and the interrupt)
1135        let range_fn: PromptGeneratorFn = Arc::new(|_m| "".to_string());
1136        let range_select: CompactionRangeFn = Arc::new(|m| {
1137            let idx = m
1138                .iter()
1139                .position(|mm| matches!(mm, ChatCompletionRequestMessage::Assistant(_)))
1140                .unwrap();
1141            CompactionRange {
1142                start: 1,
1143                end: idx + 1,
1144            }
1145        });
1146        let policy = CompactionPolicy {
1147            compaction_model: "gpt-4o-mini".into(),
1148            proactive_threshold: None,
1149            compaction_strategy: CompactionStrategy::Custom(range_select),
1150            compaction_prompt: CompactionPrompt::Dynamic(range_fn),
1151            max_compaction_attempts: 1,
1152            orphaned_tool_call_strategy: OrphanedToolCallStrategy::DropAndReappend,
1153        };
1154
1155        let provider = CapturingProvider::new();
1156        let ac = make_compactor_with_policy(policy, provider.clone());
1157        let rt = tokio::runtime::Runtime::new().unwrap();
1158        let _ = rt
1159            .block_on(async move { ac.compact_messages(msgs).await })
1160            .unwrap();
1161        let req = rt
1162            .block_on(async move { provider.get().await })
1163            .expect("captured");
1164
1165        // The compactor appends a user instruction if the slice doesn't end with a user
1166        // We ended with assistant, so a user instruction should have been appended.
1167        assert!(
1168            matches!(
1169                req.messages.last(),
1170                Some(ChatCompletionRequestMessage::User(_))
1171            ),
1172            "expected appended user instruction at end of preflight"
1173        );
1174    }
1175
1176    #[test]
1177    fn compaction_boundary_extends_to_include_all_tool_responses_for_last_assistant() {
1178        // TODO: tests_compaction_multicall_boundary_complete
1179        // Assistant with two tool_calls followed by two tool responses; end is set at assistant,
1180        // compactor should extend to include both contiguous tool responses.
1181        let msgs = vec![
1182            system_msg("S"),
1183            user_msg("U1"),
1184            asst_with_tool_calls(&["c1", "c2"]),
1185            tool_msg("c1", "r1"),
1186            tool_msg("c2", "r2"),
1187            user_msg("U2"),
1188        ];
1189
1190        let range_fn: PromptGeneratorFn = Arc::new(|_m| "".to_string());
1191        let range_select: CompactionRangeFn = Arc::new(|m| {
1192            let idx = m
1193                .iter()
1194                .position(|mm| matches!(mm, ChatCompletionRequestMessage::Assistant(_)))
1195                .unwrap();
1196            CompactionRange {
1197                start: 1,
1198                end: idx + 1,
1199            }
1200        });
1201        let policy = CompactionPolicy {
1202            compaction_model: "gpt-4o-mini".into(),
1203            proactive_threshold: None,
1204            compaction_strategy: CompactionStrategy::Custom(range_select),
1205            compaction_prompt: CompactionPrompt::Dynamic(range_fn),
1206            max_compaction_attempts: 1,
1207            orphaned_tool_call_strategy: OrphanedToolCallStrategy::DropAndReappend,
1208        };
1209
1210        let provider = CapturingProvider::new();
1211        let ac = make_compactor_with_policy(policy, provider.clone());
1212        let rt = tokio::runtime::Runtime::new().unwrap();
1213        let _ = rt
1214            .block_on(async move { ac.compact_messages(msgs).await })
1215            .unwrap();
1216        let req = rt
1217            .block_on(async move { provider.get().await })
1218            .expect("captured");
1219
1220        // Expect both tool responses present (no violations under strict contiguity policy)
1221        let strict = ValidationPolicy {
1222            require_user_first: false,
1223            allow_repeated_roles: true,
1224            enforce_contiguous_tool_responses: true,
1225            ..Default::default()
1226        };
1227        if let Some(violations) = validate_conversation(&req.messages, &strict) {
1228            eprintln!(
1229                "strict policy: allow_repeated_roles={}, enforce_contiguous_tool_responses={}",
1230                strict.allow_repeated_roles, strict.enforce_contiguous_tool_responses
1231            );
1232            eprintln!("violations: {:?}", violations);
1233            panic!("expected preflight to be valid when contiguous tools exist");
1234        }
1235    }
1236
1237    #[test]
1238    fn compaction_global_ok_but_slice_orphaned_missing_tools() {
1239        // TODO: tests_compaction_global_vs_slice_orphan
1240        // Full transcript is valid (assistant tool_calls followed by tool),
1241        // but the chosen slice excludes the tool response, making the preflight invalid.
1242        let full = vec![
1243            system_msg("S"),
1244            user_msg("U1"),
1245            asst_with_tool_calls(&["c1"]),
1246            tool_msg("c1", "{}"),
1247            user_msg("U2"),
1248        ];
1249
1250        let base_policy = ValidationPolicy {
1251            require_user_first: false,
1252            ..Default::default()
1253        };
1254        assert!(validate_conversation(&full, &base_policy).is_none());
1255
1256        // Custom range excludes the tool response (end at assistant)
1257        let range_fn: PromptGeneratorFn = Arc::new(|_m| "".to_string());
1258        let range_select: CompactionRangeFn = Arc::new(|m| {
1259            let idx = m
1260                .iter()
1261                .position(|mm| matches!(mm, ChatCompletionRequestMessage::Assistant(_)))
1262                .unwrap();
1263            CompactionRange {
1264                start: 1,
1265                end: idx + 1,
1266            }
1267        });
1268        let policy = CompactionPolicy {
1269            compaction_model: "gpt-4o-mini".into(),
1270            proactive_threshold: None,
1271            compaction_strategy: CompactionStrategy::Custom(range_select),
1272            compaction_prompt: CompactionPrompt::Dynamic(range_fn),
1273            max_compaction_attempts: 1,
1274            orphaned_tool_call_strategy: OrphanedToolCallStrategy::DropAndReappend,
1275        };
1276
1277        let provider = CapturingProvider::new();
1278        let ac = make_compactor_with_policy(policy, provider.clone());
1279        let rt = tokio::runtime::Runtime::new().unwrap();
1280        let _ = rt
1281            .block_on(async move { ac.compact_messages(full).await })
1282            .unwrap();
1283        let req = rt
1284            .block_on(async move { provider.get().await })
1285            .expect("captured");
1286
1287        let strict = ValidationPolicy {
1288            require_user_first: false,
1289            allow_repeated_roles: true,
1290            enforce_contiguous_tool_responses: true,
1291            ..Default::default()
1292        };
1293        assert!(validate_conversation(&req.messages, &strict).is_none());
1294    }
1295
1296    #[test]
1297    fn compaction_strategy_comparison_drop_vs_placeholder() {
1298        // TODO: tests_compaction_property_strategy_comparison
1299        // Same input/slice, compare DropAndReappend vs AddPlaceholderResponses
1300        let msgs = vec![
1301            system_msg("S"),
1302            user_msg("U1"),
1303            asst_with_tool_calls(&["c1"]),
1304            user_msg("interrupt"),
1305            tool_msg("c1", "{}"),
1306        ];
1307
1308        let range_fn: PromptGeneratorFn = Arc::new(|_m| "".to_string());
1309        let range_select: CompactionRangeFn = Arc::new(|m| {
1310            let idx = m
1311                .iter()
1312                .position(|mm| matches!(mm, ChatCompletionRequestMessage::Assistant(_)))
1313                .unwrap();
1314            CompactionRange {
1315                start: 1,
1316                end: idx + 1,
1317            }
1318        });
1319
1320        let mk_pol = |strat| CompactionPolicy {
1321            compaction_model: "gpt-4o-mini".into(),
1322            proactive_threshold: None,
1323            compaction_strategy: CompactionStrategy::Custom(range_select.clone()),
1324            compaction_prompt: CompactionPrompt::Dynamic(range_fn.clone()),
1325            max_compaction_attempts: 1,
1326            orphaned_tool_call_strategy: strat,
1327        };
1328
1329        // DropAndReappend
1330        let prov1 = CapturingProvider::new();
1331        let ac1 = make_compactor_with_policy(
1332            mk_pol(OrphanedToolCallStrategy::DropAndReappend),
1333            prov1.clone(),
1334        );
1335        let rt = tokio::runtime::Runtime::new().unwrap();
1336        let _ = rt
1337            .block_on(async { ac1.compact_messages(msgs.clone()).await })
1338            .unwrap();
1339        let req1 = rt.block_on(async move { prov1.get().await }).unwrap();
1340
1341        // AddPlaceholderResponses
1342        let prov2 = CapturingProvider::new();
1343        let ac2 = make_compactor_with_policy(
1344            mk_pol(OrphanedToolCallStrategy::AddPlaceholderResponses),
1345            prov2.clone(),
1346        );
1347        let _ = rt
1348            .block_on(async { ac2.compact_messages(msgs.clone()).await })
1349            .unwrap();
1350        let req2 = rt.block_on(async move { prov2.get().await }).unwrap();
1351
1352        let strict = ValidationPolicy {
1353            require_user_first: false,
1354            allow_repeated_roles: true,
1355            enforce_contiguous_tool_responses: true,
1356            ..Default::default()
1357        };
1358        if validate_conversation(&req2.messages, &strict).is_some() {
1359            eprintln!(
1360                "placeholder violations: {:?}",
1361                validate_conversation(&req2.messages, &strict).unwrap()
1362            );
1363        }
1364        // With slice-aware boundary repair, both strategies should yield valid preflight
1365        assert!(
1366            validate_conversation(&req1.messages, &strict).is_none(),
1367            "DropAndReappend should be valid with slice-aware boundary repair"
1368        );
1369        assert!(
1370            validate_conversation(&req2.messages, &strict).is_none(),
1371            "AddPlaceholderResponses should be valid with slice-aware boundary repair"
1372        );
1373    }
1374
1375    #[test]
1376    fn compaction_on_merged_history_like_handoff_preflight_validity() {
1377        // TODO: tests_compaction_handoff_merged_preflight
1378        // Simulate merged history (original + accumulated) and ensure preflight validity can fail
1379        // with the strict policy when slice cuts across tool boundaries.
1380        let merged = vec![
1381            system_msg("S"),
1382            user_msg("U1"),
1383            asst_with_tool_calls(&["c1"]),
1384            user_msg("interrupt"),
1385            tool_msg("c1", "{}"),
1386            user_msg("U2"),
1387        ];
1388
1389        let range_fn: PromptGeneratorFn = Arc::new(|_m| "".to_string());
1390        let range_select: CompactionRangeFn = Arc::new(|m| {
1391            // Cut right after assistant (exclude interrupt/user and tools)
1392            let idx = m
1393                .iter()
1394                .position(|mm| matches!(mm, ChatCompletionRequestMessage::Assistant(_)))
1395                .unwrap();
1396            CompactionRange {
1397                start: 1,
1398                end: idx + 1,
1399            }
1400        });
1401        let policy = CompactionPolicy {
1402            compaction_model: "gpt-4o-mini".into(),
1403            proactive_threshold: None,
1404            compaction_strategy: CompactionStrategy::Custom(range_select),
1405            compaction_prompt: CompactionPrompt::Dynamic(range_fn),
1406            max_compaction_attempts: 1,
1407            orphaned_tool_call_strategy: OrphanedToolCallStrategy::DropAndReappend,
1408        };
1409
1410        let provider = CapturingProvider::new();
1411        let ac = make_compactor_with_policy(policy, provider.clone());
1412        let rt = tokio::runtime::Runtime::new().unwrap();
1413        let _ = rt
1414            .block_on(async move { ac.compact_messages(merged).await })
1415            .unwrap();
1416        let req = rt
1417            .block_on(async move { provider.get().await })
1418            .expect("captured");
1419
1420        let strict = ValidationPolicy {
1421            require_user_first: false,
1422            enforce_contiguous_tool_responses: true,
1423            ..Default::default()
1424        };
1425        // TDD red: expect valid preflight with slice-aware compaction
1426        assert!(validate_conversation(&req.messages, &strict).is_none());
1427    }
1428
1429    #[test]
1430    fn test_compaction_range_preserve_system() {
1431        let strategy = CompactionStrategy::PreserveSystemAndRecent { recent_count: 2 };
1432        let messages = vec![
1433            ChatCompletionRequestSystemMessageArgs::default()
1434                .content("System")
1435                .build()
1436                .unwrap()
1437                .into(),
1438            ChatCompletionRequestUserMessageArgs::default()
1439                .content("User1")
1440                .build()
1441                .unwrap()
1442                .into(),
1443            ChatCompletionRequestAssistantMessageArgs::default()
1444                .content("Assistant1")
1445                .build()
1446                .unwrap()
1447                .into(),
1448            ChatCompletionRequestUserMessageArgs::default()
1449                .content("User2")
1450                .build()
1451                .unwrap()
1452                .into(),
1453            ChatCompletionRequestAssistantMessageArgs::default()
1454                .content("Assistant2")
1455                .build()
1456                .unwrap()
1457                .into(),
1458        ];
1459
1460        let range = match strategy {
1461            CompactionStrategy::PreserveSystemAndRecent { recent_count } => {
1462                let start = messages
1463                    .iter()
1464                    .position(|m| !matches!(m, ChatCompletionRequestMessage::System(_)))
1465                    .unwrap_or(0);
1466                let end = messages.len().saturating_sub(recent_count);
1467                CompactionRange {
1468                    start,
1469                    end: end.max(start),
1470                }
1471            }
1472            _ => panic!("Wrong strategy"),
1473        };
1474
1475        assert_eq!(range.start, 1); // After system message
1476        assert_eq!(range.end, 3); // Keep last 2 messages
1477    }
1478
1479    #[test]
1480    fn test_compaction_strategy_compact_all_but_last() {
1481        let strategy = CompactionStrategy::CompactAllButLast(3);
1482        let messages: Vec<ChatCompletionRequestMessage> = vec![
1483            ChatCompletionRequestUserMessageArgs::default()
1484                .content("1")
1485                .build()
1486                .unwrap()
1487                .into(),
1488            ChatCompletionRequestAssistantMessageArgs::default()
1489                .content("2")
1490                .build()
1491                .unwrap()
1492                .into(),
1493            ChatCompletionRequestUserMessageArgs::default()
1494                .content("3")
1495                .build()
1496                .unwrap()
1497                .into(),
1498            ChatCompletionRequestAssistantMessageArgs::default()
1499                .content("4")
1500                .build()
1501                .unwrap()
1502                .into(),
1503            ChatCompletionRequestUserMessageArgs::default()
1504                .content("5")
1505                .build()
1506                .unwrap()
1507                .into(),
1508        ];
1509
1510        let range = match strategy {
1511            CompactionStrategy::CompactAllButLast(n) => {
1512                let start = 0;
1513                let end = messages.len().saturating_sub(n);
1514                CompactionRange { start, end }
1515            }
1516            _ => panic!("Wrong strategy"),
1517        };
1518
1519        assert_eq!(range.start, 0);
1520        assert_eq!(range.end, 2); // Keep last 3 messages
1521    }
1522
1523    #[test]
1524    fn test_compaction_prompt_default() {
1525        let prompt = CompactionPrompt::Default;
1526        let messages = vec![];
1527        let generated = prompt.generate(&messages);
1528        assert!(generated.contains("concise summary"));
1529        assert!(generated.contains("conversation continuity"));
1530    }
1531
1532    #[test]
1533    fn test_compaction_prompt_custom() {
1534        let prompt = CompactionPrompt::Custom("Custom prompt".to_string());
1535        let messages = vec![];
1536        let generated = prompt.generate(&messages);
1537        assert_eq!(generated, "Custom prompt");
1538    }
1539
1540    #[test]
1541    fn test_compaction_prompt_dynamic() {
1542        let prompt = CompactionPrompt::Dynamic(Arc::new(|msgs| {
1543            format!("Summarize {} messages", msgs.len())
1544        }));
1545        let messages = vec![ChatCompletionRequestUserMessageArgs::default()
1546            .content("test")
1547            .build()
1548            .unwrap()
1549            .into()];
1550        let generated = prompt.generate(&messages);
1551        assert_eq!(generated, "Summarize 1 messages");
1552    }
1553
1554    #[test]
1555    fn test_is_context_length_error() {
1556        let test_cases = vec![
1557            ("context_length_exceeded", true),
1558            ("Error: context length exceeded", true),
1559            ("Maximum context reached", true),
1560            ("Token limit exceeded", true),
1561            ("Some other error", false),
1562            ("Network timeout", false),
1563        ];
1564
1565        for (error_msg, expected) in test_cases {
1566            let error: BoxError = error_msg.into();
1567            assert_eq!(
1568                AutoCompaction::<DummyService, DummyProvider, SimpleTokenCounter>::is_context_length_error(&error),
1569                expected,
1570                "Failed for: {}",
1571                error_msg
1572            );
1573        }
1574    }
1575
1576    #[test]
1577    fn test_proactive_threshold() {
1578        let threshold = ProactiveThreshold {
1579            token_threshold: 1000,
1580            percentage_threshold: Some(0.8),
1581        };
1582
1583        assert_eq!(threshold.token_threshold, 1000);
1584        assert_eq!(threshold.percentage_threshold, Some(0.8));
1585    }
1586
1587    #[test]
1588    fn test_compaction_policy_default() {
1589        let policy = CompactionPolicy::default();
1590        assert_eq!(policy.compaction_model, "gpt-4o-mini");
1591        assert!(policy.proactive_threshold.is_none());
1592        assert_eq!(policy.max_compaction_attempts, 2);
1593        matches!(
1594            policy.compaction_strategy,
1595            CompactionStrategy::PreserveSystemAndRecent { .. }
1596        );
1597        matches!(
1598            policy.orphaned_tool_call_strategy,
1599            OrphanedToolCallStrategy::DropAndReappend
1600        );
1601    }
1602
1603    #[test]
1604    fn test_orphaned_tool_call_detection() {
1605        use async_openai::types::{
1606            ChatCompletionMessageToolCall, ChatCompletionRequestToolMessageArgs,
1607            ChatCompletionToolType, FunctionCall,
1608        };
1609
1610        // Create messages with an orphaned tool call
1611        let messages = vec![
1612            ChatCompletionRequestUserMessageArgs::default()
1613                .content("Please help me")
1614                .build()
1615                .unwrap()
1616                .into(),
1617            ChatCompletionRequestAssistantMessageArgs::default()
1618                .content("")
1619                .tool_calls(vec![ChatCompletionMessageToolCall {
1620                    id: "call_123".to_string(),
1621                    r#type: ChatCompletionToolType::Function,
1622                    function: FunctionCall {
1623                        name: "some_tool".to_string(),
1624                        arguments: "{}".to_string(),
1625                    },
1626                }])
1627                .build()
1628                .unwrap()
1629                .into(),
1630            // No tool response message - this is orphaned!
1631        ];
1632
1633        assert!(has_orphaned_tool_calls(&messages));
1634
1635        // Now add the tool response - no longer orphaned
1636        let mut messages_with_response = messages.clone();
1637        messages_with_response.push(
1638            ChatCompletionRequestToolMessageArgs::default()
1639                .content("Tool result")
1640                .tool_call_id("call_123")
1641                .build()
1642                .unwrap()
1643                .into(),
1644        );
1645
1646        assert!(!has_orphaned_tool_calls(&messages_with_response));
1647    }
1648
1649    #[derive(Clone)]
1650    struct CapturingProvider {
1651        captured: StdArc<AsyncMutex<Option<CreateChatCompletionRequest>>>,
1652    }
1653    impl CapturingProvider {
1654        fn new() -> Self {
1655            Self {
1656                captured: StdArc::new(AsyncMutex::new(None)),
1657            }
1658        }
1659        async fn get(&self) -> Option<CreateChatCompletionRequest> {
1660            self.captured.lock().await.clone()
1661        }
1662    }
1663    impl Service<CreateChatCompletionRequest> for CapturingProvider {
1664        type Response = crate::provider::ProviderResponse;
1665        type Error = BoxError;
1666        type Future = std::pin::Pin<
1667            Box<dyn std::future::Future<Output = Result<Self::Response, Self::Error>> + Send>,
1668        >;
1669        fn poll_ready(
1670            &mut self,
1671            _cx: &mut std::task::Context<'_>,
1672        ) -> std::task::Poll<Result<(), Self::Error>> {
1673            std::task::Poll::Ready(Ok(()))
1674        }
1675        fn call(&mut self, req: CreateChatCompletionRequest) -> Self::Future {
1676            let captured = self.captured.clone();
1677            Box::pin(async move {
1678                *captured.lock().await = Some(req);
1679                #[allow(deprecated)]
1680                let assistant = async_openai::types::ChatCompletionResponseMessage {
1681                    content: Some("summary".into()),
1682                    role: async_openai::types::Role::Assistant,
1683                    tool_calls: None,
1684                    refusal: None,
1685                    audio: None,
1686                    function_call: None,
1687                };
1688                Ok(crate::provider::ProviderResponse {
1689                    assistant,
1690                    prompt_tokens: 1,
1691                    completion_tokens: 1,
1692                })
1693            })
1694        }
1695    }
1696
1697    proptest! {
1698        #[test]
1699        fn compaction_preflight_and_post_valid_for_valid_inputs(msgs in gen::valid_conversation(gen::GeneratorConfig::default())) {
1700            let provider = CapturingProvider::new();
1701            let provider_clone = provider.clone();
1702            let policy = CompactionPolicy {
1703                compaction_model: "gpt-4o-mini".into(),
1704                compaction_strategy: CompactionStrategy::CompactAllButLast(1),
1705                orphaned_tool_call_strategy: OrphanedToolCallStrategy::AddPlaceholderResponses,
1706                ..Default::default()
1707            };
1708            let counter = SimpleTokenCounter::new();
1709            let dummy_inner = (); // unused by compact_messages
1710            let ac = AutoCompaction {
1711                inner: Arc::new(tokio::sync::Mutex::new(dummy_inner)),
1712                policy,
1713                provider: Arc::new(tokio::sync::Mutex::new(provider_clone)),
1714                token_counter: Arc::new(counter),
1715            };
1716            let rt = tokio::runtime::Runtime::new().unwrap();
1717            let out = rt.block_on(async move { ac.compact_messages(msgs.clone()).await.unwrap() });
1718            // Preflight validity: captured request sent to compactor must be valid
1719            let req = rt.block_on(async move { provider.get().await }).expect("captured");
1720            prop_assert!(validate_conversation(&req.messages, &ValidationPolicy::default()).is_none());
1721            // Post-compaction validity: output must be valid
1722            prop_assert!(validate_conversation(&out, &ValidationPolicy::default()).is_none());
1723        }
1724    }
1725
1726    #[test]
1727    fn test_multiple_orphaned_tool_calls_in_middle() {
1728        use async_openai::types::{
1729            ChatCompletionMessageToolCall, ChatCompletionToolType, FunctionCall,
1730        };
1731
1732        // Test case similar to the production error - orphaned tool calls in the middle
1733        let messages = vec![
1734            ChatCompletionRequestUserMessageArgs::default()
1735                .content("First user message")
1736                .build()
1737                .unwrap()
1738                .into(),
1739            ChatCompletionRequestAssistantMessageArgs::default()
1740                .content("First response")
1741                .build()
1742                .unwrap()
1743                .into(),
1744            // First orphaned tool call (in the middle)
1745            ChatCompletionRequestAssistantMessageArgs::default()
1746                .content("")
1747                .tool_calls(vec![ChatCompletionMessageToolCall {
1748                    id: "call_middle".to_string(),
1749                    r#type: ChatCompletionToolType::Function,
1750                    function: FunctionCall {
1751                        name: "middle_tool".to_string(),
1752                        arguments: "{}".to_string(),
1753                    },
1754                }])
1755                .build()
1756                .unwrap()
1757                .into(),
1758            // No tool response - orphaned!
1759            // Conversation continues...
1760            ChatCompletionRequestUserMessageArgs::default()
1761                .content("Continue conversation")
1762                .build()
1763                .unwrap()
1764                .into(),
1765            ChatCompletionRequestAssistantMessageArgs::default()
1766                .content("Continuing...")
1767                .build()
1768                .unwrap()
1769                .into(),
1770            // Another orphaned tool call at the end
1771            ChatCompletionRequestAssistantMessageArgs::default()
1772                .content("")
1773                .tool_calls(vec![ChatCompletionMessageToolCall {
1774                    id: "call_end".to_string(),
1775                    r#type: ChatCompletionToolType::Function,
1776                    function: FunctionCall {
1777                        name: "end_tool".to_string(),
1778                        arguments: "{}".to_string(),
1779                    },
1780                }])
1781                .build()
1782                .unwrap()
1783                .into(),
1784        ];
1785
1786        // Should detect both orphaned tool calls
1787        assert!(has_orphaned_tool_calls(&messages));
1788        let orphaned_indices = find_orphaned_tool_calls_test(&messages);
1789        assert_eq!(orphaned_indices.len(), 2);
1790        assert_eq!(orphaned_indices[0], 2); // Middle orphaned message
1791        assert_eq!(orphaned_indices[1], 5); // End orphaned message
1792
1793        // Extract orphaned messages
1794        let (cleaned, orphaned) = extract_orphaned_tool_calls(messages.clone());
1795        assert_eq!(cleaned.len(), 4); // 6 original - 2 orphaned = 4
1796        assert_eq!(orphaned.len(), 2);
1797
1798        // Verify the correct messages were extracted
1799        assert_eq!(orphaned[0].0, 2);
1800        assert_eq!(orphaned[1].0, 5);
1801    }
1802
1803    #[test]
1804    fn test_mixed_orphaned_and_valid_tool_calls() {
1805        use async_openai::types::{
1806            ChatCompletionMessageToolCall, ChatCompletionRequestToolMessageArgs,
1807            ChatCompletionToolType, FunctionCall,
1808        };
1809
1810        let messages = vec![
1811            ChatCompletionRequestUserMessageArgs::default()
1812                .content("User message")
1813                .build()
1814                .unwrap()
1815                .into(),
1816            // Valid tool call with response
1817            ChatCompletionRequestAssistantMessageArgs::default()
1818                .content("")
1819                .tool_calls(vec![ChatCompletionMessageToolCall {
1820                    id: "call_valid".to_string(),
1821                    r#type: ChatCompletionToolType::Function,
1822                    function: FunctionCall {
1823                        name: "valid_tool".to_string(),
1824                        arguments: "{}".to_string(),
1825                    },
1826                }])
1827                .build()
1828                .unwrap()
1829                .into(),
1830            ChatCompletionRequestToolMessageArgs::default()
1831                .content("Valid tool response")
1832                .tool_call_id("call_valid")
1833                .build()
1834                .unwrap()
1835                .into(),
1836            // Orphaned tool call
1837            ChatCompletionRequestAssistantMessageArgs::default()
1838                .content("")
1839                .tool_calls(vec![ChatCompletionMessageToolCall {
1840                    id: "call_orphaned".to_string(),
1841                    r#type: ChatCompletionToolType::Function,
1842                    function: FunctionCall {
1843                        name: "orphaned_tool".to_string(),
1844                        arguments: "{}".to_string(),
1845                    },
1846                }])
1847                .build()
1848                .unwrap()
1849                .into(),
1850            // No response for orphaned call
1851            ChatCompletionRequestUserMessageArgs::default()
1852                .content("Another message")
1853                .build()
1854                .unwrap()
1855                .into(),
1856        ];
1857
1858        assert!(has_orphaned_tool_calls(&messages));
1859        let orphaned_indices = find_orphaned_tool_calls_test(&messages);
1860        assert_eq!(orphaned_indices.len(), 1);
1861        assert_eq!(orphaned_indices[0], 3); // Only the orphaned one
1862
1863        let (cleaned, orphaned) = extract_orphaned_tool_calls(messages.clone());
1864        assert_eq!(cleaned.len(), 4); // Valid tool call and response remain
1865        assert_eq!(orphaned.len(), 1);
1866        assert_eq!(orphaned[0].0, 3);
1867    }
1868
1869    #[test]
1870    fn test_multiple_tool_calls_partial_orphaned() {
1871        use async_openai::types::{
1872            ChatCompletionMessageToolCall, ChatCompletionRequestToolMessageArgs,
1873            ChatCompletionToolType, FunctionCall,
1874        };
1875
1876        // Assistant message with multiple tool calls, only some have responses
1877        let messages = vec![
1878            ChatCompletionRequestUserMessageArgs::default()
1879                .content("User message")
1880                .build()
1881                .unwrap()
1882                .into(),
1883            ChatCompletionRequestAssistantMessageArgs::default()
1884                .content("")
1885                .tool_calls(vec![
1886                    ChatCompletionMessageToolCall {
1887                        id: "call_1".to_string(),
1888                        r#type: ChatCompletionToolType::Function,
1889                        function: FunctionCall {
1890                            name: "tool_1".to_string(),
1891                            arguments: "{}".to_string(),
1892                        },
1893                    },
1894                    ChatCompletionMessageToolCall {
1895                        id: "call_2".to_string(),
1896                        r#type: ChatCompletionToolType::Function,
1897                        function: FunctionCall {
1898                            name: "tool_2".to_string(),
1899                            arguments: "{}".to_string(),
1900                        },
1901                    },
1902                ])
1903                .build()
1904                .unwrap()
1905                .into(),
1906            // Only response for call_1, not call_2
1907            ChatCompletionRequestToolMessageArgs::default()
1908                .content("Response for call_1")
1909                .tool_call_id("call_1")
1910                .build()
1911                .unwrap()
1912                .into(),
1913            // call_2 is orphaned!
1914        ];
1915
1916        assert!(has_orphaned_tool_calls(&messages));
1917        let orphaned_indices = find_orphaned_tool_calls_test(&messages);
1918        assert_eq!(orphaned_indices.len(), 1);
1919        assert_eq!(orphaned_indices[0], 1); // The assistant message with partial orphaned calls
1920    }
1921
1922    #[test]
1923    fn test_orphaned_tool_call_strategy_drop_and_reappend() {
1924        use async_openai::types::{
1925            ChatCompletionMessageToolCall, ChatCompletionToolType, FunctionCall,
1926        };
1927
1928        let messages = vec![
1929            ChatCompletionRequestUserMessageArgs::default()
1930                .content("User message")
1931                .build()
1932                .unwrap()
1933                .into(),
1934            ChatCompletionRequestAssistantMessageArgs::default()
1935                .content("Assistant response")
1936                .build()
1937                .unwrap()
1938                .into(),
1939            ChatCompletionRequestUserMessageArgs::default()
1940                .content("Another user message")
1941                .build()
1942                .unwrap()
1943                .into(),
1944            ChatCompletionRequestAssistantMessageArgs::default()
1945                .content("")
1946                .tool_calls(vec![ChatCompletionMessageToolCall {
1947                    id: "orphaned_call".to_string(),
1948                    r#type: ChatCompletionToolType::Function,
1949                    function: FunctionCall {
1950                        name: "orphaned_tool".to_string(),
1951                        arguments: "{}".to_string(),
1952                    },
1953                }])
1954                .build()
1955                .unwrap()
1956                .into(),
1957        ];
1958
1959        let (cleaned, orphaned) = extract_orphaned_tool_calls(messages.clone());
1960
1961        // Should have removed the last message with tool calls
1962        assert_eq!(cleaned.len(), 3);
1963        assert_eq!(orphaned.len(), 1);
1964
1965        // The orphaned message should be the one with tool calls
1966        let (original_idx, orphaned_msg) = &orphaned[0];
1967        assert_eq!(*original_idx, 3); // The 4th message (index 3) was orphaned
1968        if let ChatCompletionRequestMessage::Assistant(asst) = orphaned_msg {
1969            assert!(asst.tool_calls.is_some());
1970        } else {
1971            panic!("Expected assistant message");
1972        }
1973    }
1974
1975    // Helper functions for testing
1976    fn has_orphaned_tool_calls(messages: &[ChatCompletionRequestMessage]) -> bool {
1977        // Use the actual implementation's logic
1978        !find_orphaned_tool_calls_test(messages).is_empty()
1979    }
1980
1981    fn find_orphaned_tool_calls_test(messages: &[ChatCompletionRequestMessage]) -> Vec<usize> {
1982        let mut orphaned_indices = Vec::new();
1983
1984        for (i, msg) in messages.iter().enumerate() {
1985            if let ChatCompletionRequestMessage::Assistant(asst) = msg {
1986                if let Some(tool_calls) = &asst.tool_calls {
1987                    if !tool_calls.is_empty() {
1988                        // Check if all tool calls have corresponding responses
1989                        let mut all_calls_have_responses = true;
1990                        for tool_call in tool_calls {
1991                            let mut found_response = false;
1992                            // Look for a tool response with matching ID after this message
1993                            for subsequent_msg in messages.iter().skip(i + 1) {
1994                                if let ChatCompletionRequestMessage::Tool(tool_msg) = subsequent_msg
1995                                {
1996                                    if tool_msg.tool_call_id == tool_call.id {
1997                                        found_response = true;
1998                                        break;
1999                                    }
2000                                }
2001                                // Stop looking if we hit another assistant message (conversation moved on)
2002                                if matches!(
2003                                    subsequent_msg,
2004                                    ChatCompletionRequestMessage::Assistant(_)
2005                                ) {
2006                                    break;
2007                                }
2008                            }
2009                            if !found_response {
2010                                all_calls_have_responses = false;
2011                                break;
2012                            }
2013                        }
2014
2015                        if !all_calls_have_responses {
2016                            orphaned_indices.push(i);
2017                        }
2018                    }
2019                }
2020            }
2021        }
2022
2023        orphaned_indices
2024    }
2025
2026    fn extract_orphaned_tool_calls(
2027        messages: Vec<ChatCompletionRequestMessage>,
2028    ) -> (
2029        Vec<ChatCompletionRequestMessage>,
2030        Vec<(usize, ChatCompletionRequestMessage)>,
2031    ) {
2032        let orphaned_indices = find_orphaned_tool_calls_test(&messages);
2033
2034        if orphaned_indices.is_empty() {
2035            return (messages, Vec::new());
2036        }
2037
2038        let mut cleaned = Vec::new();
2039        let mut orphaned = Vec::new();
2040
2041        for (i, msg) in messages.into_iter().enumerate() {
2042            if orphaned_indices.contains(&i) {
2043                orphaned.push((i, msg));
2044            } else {
2045                cleaned.push(msg);
2046            }
2047        }
2048
2049        (cleaned, orphaned)
2050    }
2051
2052    // Dummy types for testing
2053    struct DummyService;
2054    struct DummyProvider;
2055
2056    impl Service<CreateChatCompletionRequest> for DummyService {
2057        type Response = crate::core::StepOutcome;
2058        type Error = BoxError;
2059        type Future = Pin<Box<dyn Future<Output = Result<Self::Response, Self::Error>> + Send>>;
2060
2061        fn poll_ready(
2062            &mut self,
2063            _: &mut std::task::Context<'_>,
2064        ) -> std::task::Poll<Result<(), Self::Error>> {
2065            std::task::Poll::Ready(Ok(()))
2066        }
2067
2068        fn call(&mut self, _: CreateChatCompletionRequest) -> Self::Future {
2069            Box::pin(async {
2070                Ok(crate::core::StepOutcome::Done {
2071                    messages: vec![],
2072                    aux: Default::default(),
2073                })
2074            })
2075        }
2076    }
2077
2078    impl Service<CreateChatCompletionRequest> for DummyProvider {
2079        type Response = crate::provider::ProviderResponse;
2080        type Error = BoxError;
2081        type Future = Pin<Box<dyn Future<Output = Result<Self::Response, Self::Error>> + Send>>;
2082
2083        fn poll_ready(
2084            &mut self,
2085            _: &mut std::task::Context<'_>,
2086        ) -> std::task::Poll<Result<(), Self::Error>> {
2087            std::task::Poll::Ready(Ok(()))
2088        }
2089
2090        fn call(&mut self, _: CreateChatCompletionRequest) -> Self::Future {
2091            Box::pin(async {
2092                #[allow(deprecated)]
2093                let assistant = async_openai::types::ChatCompletionResponseMessage {
2094                    content: Some("summary".to_string()),
2095                    role: async_openai::types::Role::Assistant,
2096                    tool_calls: None,
2097                    refusal: None,
2098                    audio: None,
2099                    function_call: None,
2100                };
2101                Ok(crate::provider::ProviderResponse {
2102                    assistant,
2103                    prompt_tokens: 10,
2104                    completion_tokens: 10,
2105                })
2106            })
2107        }
2108    }
2109}