Skip to main content

phi_core/context/
strategy.rs

1use super::compaction::*;
2use super::config::*;
3use super::token::*;
4use crate::types::*;
5use async_trait::async_trait;
6use chrono::Utc;
7#[allow(unused_imports)]
8use std::sync::Arc;
9
10// ---------------------------------------------------------------------------
11// Compaction strategy
12// ---------------------------------------------------------------------------
13
14/// Strategy for compacting messages when context exceeds budget.
15///
16/// Implement this to customize what happens during compaction:
17/// - Index discarded content into a memory store before removal
18/// - Apply custom preservation rules (e.g., always keep decisions)
19/// - Emit metadata about what was compressed
20///
21/// See the [Custom Compaction](https://LazyBouy.github.io/phi-core/concepts/agent-loop.html#custom-compaction)
22/// docs for examples.
23/*
24RUST QUIRK: Traits as seams for extensibility (the Strategy pattern)
25
26CompactionStrategy is a classic "strategy pattern" expressed as a Rust trait.
27The agent loop calls `strategy.compact(messages, config)` — it doesn't know
28whether it's calling DefaultCompaction or a custom user-provided strategy.
29
30This is polymorphism without inheritance:
31  - In OOP: you'd subclass a BaseCompaction class
32  - In Rust: you implement a trait
33
34The trait object `Arc<dyn CompactionStrategy>` in AgentLoopConfig means:
35"store any type that implements CompactionStrategy, dispatched at runtime."
36
37Why `Send + Sync` bounds?
38  - The agent loop may run on any tokio thread pool thread
39  - The strategy is shared via Arc, so it must be Sync (safe to &-reference across threads)
40  - It must be Send (safe to move to another thread)
41  - Basically: thread-safe is required because tokio = multi-threaded by default
42
43The `messages: Vec<AgentMessage>` parameter takes ownership (not a borrow).
44This is intentional: compaction rewrites the list. Passing by value lets the
45implementation freely mutate, filter, and reconstruct without cloning.
46*/
47pub trait CompactionStrategy: Send + Sync {
48    /// Compact messages to fit within the token budget defined by `config`.
49    ///
50    /// Called before each LLM turn when `context_config` is set.
51    fn compact(
52        &self,
53        messages: Vec<AgentMessage>, // OWNED — taken by value so implementation can freely rewrite without cloning
54        config: &ContextConfig, // SETTINGS — token budget, keep_first/keep_recent counts, tool_output_max_lines
55    ) -> Vec<AgentMessage>;
56}
57
58/// Default 3-level compaction: truncate tool outputs → summarize turns → drop middle.
59///
60/// This is used automatically when no custom `CompactionStrategy` is set.
61/// You can also compose it inside a custom strategy — run your logic first,
62/// then delegate to `compact_messages()` for the actual reduction.
63pub struct DefaultCompaction;
64
65impl CompactionStrategy for DefaultCompaction {
66    fn compact(
67        &self,
68        messages: Vec<AgentMessage>, // OWNED — passed directly to compact_messages()
69        config: &ContextConfig,      // SETTINGS — forwarded to compact_messages()
70    ) -> Vec<AgentMessage> {
71        super::compact_messages::compact_messages_with_counter(
72            messages,
73            config,
74            config.token_counter.as_ref(),
75        )
76    }
77}
78
79// ---------------------------------------------------------------------------
80// Block-based compaction strategy (non-destructive overlay model)
81// ---------------------------------------------------------------------------
82
83use crate::session::LoopRecord;
84
85/// Strategy for creating non-destructive `CompactionBlock` overlays.
86///
87/// Three methods produce the three sections of a `CompactionBlock`:
88/// - `keep_first`: turns kept verbatim from the start
89/// - `keep_recent`: recent turns with truncated tool outputs
90/// - `keep_compacted`: fully summarised section
91///
92/// The default `compact()` method assembles them. Override individual methods
93/// to customise specific sections (e.g. LLM-based summarisation for `keep_compacted`).
94///
95/// As of phi-core 0.9.0, all four methods are `async fn` (via `#[async_trait]`)
96/// so implementations can issue LLM calls inside `keep_compacted`/`keep_recent`
97/// without `block_in_place` workarounds. Sync implementations migrate by
98/// prepending `#[async_trait]` to the impl + `async` to each method —
99/// existing bodies need no changes if they don't `.await` anything.
100#[async_trait]
101pub trait BlockCompactionStrategy: Send + Sync {
102    /// Determine the keep_first section: turns kept verbatim from the start.
103    /// Only called for the most recent loop.
104    async fn keep_first(
105        &self,
106        record: &LoopRecord,
107        turn_map: &TurnMap,
108        config: &CompactionConfig,
109    ) -> Option<TurnRange>;
110
111    /// Create the keep_recent section: recent turns with truncated tool outputs.
112    /// Only called for the most recent loop.
113    async fn keep_recent(
114        &self,
115        record: &LoopRecord,
116        turn_map: &TurnMap,
117        config: &CompactionConfig,
118    ) -> Option<CompactedSection>;
119
120    /// Create the keep_compacted section: fully summarised turns.
121    /// For most recent loop: summarises the middle (between keep_first and keep_recent).
122    /// For older loops: summarises the entire loop.
123    ///
124    /// Implementations should aim to summarise ALL turns in the range within
125    /// `config.max_summary_tokens` — e.g. shorter per-turn summaries or an
126    /// LLM-generated holistic digest. The token budget is for the total output,
127    /// not a per-turn limit.
128    async fn keep_compacted(
129        &self,
130        record: &LoopRecord,
131        turn_map: &TurnMap,
132        config: &CompactionConfig,
133        is_most_recent: bool,
134    ) -> Option<CompactedSection>;
135
136    /// Assemble a `CompactionBlock` from the three sections.
137    /// Default implementation calls the three methods above.
138    async fn compact(
139        &self,
140        record: &LoopRecord,
141        config: &CompactionConfig,
142        is_most_recent: bool,
143    ) -> CompactionBlock {
144        let turn_map = TurnMap::from_messages(&record.messages);
145        let keep_first = if is_most_recent {
146            self.keep_first(record, &turn_map, config).await
147        } else {
148            None
149        };
150        let keep_recent = if is_most_recent {
151            self.keep_recent(record, &turn_map, config).await
152        } else {
153            None
154        };
155        let keep_compacted = self
156            .keep_compacted(record, &turn_map, config, is_most_recent)
157            .await;
158        CompactionBlock {
159            keep_first,
160            keep_recent,
161            keep_compacted,
162            created_at: Utc::now(),
163        }
164    }
165}
166
167/// Default block-based compaction strategy.
168///
169/// Stateless — all parameters come from `CompactionConfig`.
170/// - `keep_first`: returns turn range `0..keep_first_turns`
171/// - `keep_compacted`: one-liner summaries of the middle section, bounded by `max_summary_tokens`
172/// - `keep_recent`: truncates tool outputs in the recent section to `tool_output_max_lines`
173pub struct DefaultBlockCompaction;
174
175#[async_trait]
176impl BlockCompactionStrategy for DefaultBlockCompaction {
177    async fn keep_first(
178        &self,
179        _record: &LoopRecord,
180        turn_map: &TurnMap,
181        config: &CompactionConfig,
182    ) -> Option<TurnRange> {
183        let total = turn_map.turn_count();
184        if total == 0 {
185            return None;
186        }
187        let end = (config.keep_first_turns as u32)
188            .min(total)
189            .saturating_sub(1);
190        Some(TurnRange {
191            start_turn: 0,
192            end_turn: end,
193        })
194    }
195
196    async fn keep_recent(
197        &self,
198        record: &LoopRecord,
199        turn_map: &TurnMap,
200        config: &CompactionConfig,
201    ) -> Option<CompactedSection> {
202        let total = turn_map.turn_count();
203        if total == 0 {
204            return None;
205        }
206        let recent_start = total.saturating_sub(config.keep_recent_turns as u32);
207        let range = TurnRange {
208            start_turn: recent_start,
209            end_turn: total - 1,
210        };
211        let msgs = turn_map.messages_for_range(&range, &record.messages);
212        // Truncate tool outputs in the recent section
213        let truncated: Vec<AgentMessage> = msgs
214            .iter()
215            .map(|m| {
216                if let AgentMessage::Llm(lm) = m {
217                    if let Message::ToolResult {
218                        tool_call_id,
219                        tool_name,
220                        content,
221                        is_error,
222                        timestamp,
223                    } = &lm.message
224                    {
225                        let truncated_content: Vec<Content> = content
226                            .iter()
227                            .map(|c| match c {
228                                Content::Text { text } => Content::Text {
229                                    text: super::compact_messages::truncate_text_head_tail(
230                                        text,
231                                        config.tool_output_max_lines,
232                                    ),
233                                },
234                                other => other.clone(),
235                            })
236                            .collect();
237                        return AgentMessage::Llm(LlmMessage {
238                            message: Message::ToolResult {
239                                tool_call_id: tool_call_id.clone(),
240                                tool_name: tool_name.clone(),
241                                content: truncated_content,
242                                is_error: *is_error,
243                                timestamp: *timestamp,
244                            },
245                            turn_id: lm.turn_id.clone(),
246                            // Preserve Composition I identity + tags through
247                            // tool-output truncation. Identity is a property of
248                            // the node, not its body bytes; tags ride along.
249                            node_id: lm.node_id,
250                            parent_id: lm.parent_id,
251                            tags: lm.tags.clone(),
252                            provenance_hint: lm.provenance_hint.clone(),
253                        });
254                    }
255                }
256                m.clone()
257            })
258            .collect();
259        Some(CompactedSection {
260            range,
261            messages: truncated,
262        })
263    }
264
265    /// Basic implementation: generates per-turn one-liner summaries until
266    /// `max_summary_tokens` is exhausted. Remaining turns are dropped.
267    ///
268    /// More sophisticated strategies (e.g. LLM-based) should produce a holistic
269    /// summary of ALL turns within the budget rather than dropping turns.
270    ///
271    /// Summaries use `Message::User` role to maintain valid LLM message alternation
272    /// (user→assistant→user→...). A summary replaces a full turn sequence
273    /// (user + assistant + tool results) with a single user-role "[Summary]" message.
274    async fn keep_compacted(
275        &self,
276        record: &LoopRecord,
277        turn_map: &TurnMap,
278        config: &CompactionConfig,
279        is_most_recent: bool,
280    ) -> Option<CompactedSection> {
281        let total = turn_map.turn_count();
282        if total == 0 {
283            return None;
284        }
285
286        let (start, end) = if is_most_recent {
287            let first_end = (config.keep_first_turns as u32).min(total);
288            let recent_start = total.saturating_sub(config.keep_recent_turns as u32);
289            if first_end >= recent_start {
290                return None; // No middle section
291            }
292            (first_end, recent_start.saturating_sub(1))
293        } else {
294            // Summarise the entire loop
295            (0, total.saturating_sub(1))
296        };
297
298        let range = TurnRange {
299            start_turn: start,
300            end_turn: end,
301        };
302        let msgs = turn_map.messages_for_range(&range, &record.messages);
303
304        // Generate one-liner summaries per assistant message
305        let mut summaries: Vec<AgentMessage> = Vec::new();
306        let mut token_budget = config.max_summary_tokens;
307
308        for msg in msgs {
309            if let AgentMessage::Llm(lm) = msg {
310                if let Message::Assistant { content, .. } = &lm.message {
311                    let text_parts: Vec<&str> = content
312                        .iter()
313                        .filter_map(|c| match c {
314                            Content::Text { text } if text.len() <= 200 => Some(text.as_str()),
315                            _ => None,
316                        })
317                        .collect();
318                    let tool_count = content
319                        .iter()
320                        .filter(|c| matches!(c, Content::ToolCall { .. }))
321                        .count();
322                    let summary = if !text_parts.is_empty() {
323                        text_parts.join(" ")
324                    } else if tool_count > 0 {
325                        format!("[Assistant used {} tool(s)]", tool_count)
326                    } else {
327                        "[Assistant response]".into()
328                    };
329                    let summary_text = format!("[Summary] {}", summary);
330                    let est_tokens = estimate_tokens(&summary_text);
331                    if est_tokens > token_budget {
332                        break; // Budget exhausted
333                    }
334                    token_budget -= est_tokens;
335                    summaries.push(AgentMessage::Llm(LlmMessage::new(Message::user(
336                        &summary_text,
337                    ))));
338                }
339            }
340        }
341
342        if summaries.is_empty() {
343            return None;
344        }
345        Some(CompactedSection {
346            range,
347            messages: summaries,
348        })
349    }
350}