Skip to main content

phi_core/context/
strategy.rs

1use super::compaction::*;
2use super::config::*;
3use super::token::*;
4use crate::types::*;
5use chrono::Utc;
6#[allow(unused_imports)]
7use std::sync::Arc;
8
9// ---------------------------------------------------------------------------
10// Compaction strategy
11// ---------------------------------------------------------------------------
12
13/// Strategy for compacting messages when context exceeds budget.
14///
15/// Implement this to customize what happens during compaction:
16/// - Index discarded content into a memory store before removal
17/// - Apply custom preservation rules (e.g., always keep decisions)
18/// - Emit metadata about what was compressed
19///
20/// See the [Custom Compaction](https://LazyBouy.github.io/phi-core/concepts/agent-loop.html#custom-compaction)
21/// docs for examples.
22/*
23RUST QUIRK: Traits as seams for extensibility (the Strategy pattern)
24
25CompactionStrategy is a classic "strategy pattern" expressed as a Rust trait.
26The agent loop calls `strategy.compact(messages, config)` — it doesn't know
27whether it's calling DefaultCompaction or a custom user-provided strategy.
28
29This is polymorphism without inheritance:
30  - In OOP: you'd subclass a BaseCompaction class
31  - In Rust: you implement a trait
32
33The trait object `Arc<dyn CompactionStrategy>` in AgentLoopConfig means:
34"store any type that implements CompactionStrategy, dispatched at runtime."
35
36Why `Send + Sync` bounds?
37  - The agent loop may run on any tokio thread pool thread
38  - The strategy is shared via Arc, so it must be Sync (safe to &-reference across threads)
39  - It must be Send (safe to move to another thread)
40  - Basically: thread-safe is required because tokio = multi-threaded by default
41
42The `messages: Vec<AgentMessage>` parameter takes ownership (not a borrow).
43This is intentional: compaction rewrites the list. Passing by value lets the
44implementation freely mutate, filter, and reconstruct without cloning.
45*/
46pub trait CompactionStrategy: Send + Sync {
47    /// Compact messages to fit within the token budget defined by `config`.
48    ///
49    /// Called before each LLM turn when `context_config` is set.
50    fn compact(
51        &self,
52        messages: Vec<AgentMessage>, // OWNED — taken by value so implementation can freely rewrite without cloning
53        config: &ContextConfig, // SETTINGS — token budget, keep_first/keep_recent counts, tool_output_max_lines
54    ) -> Vec<AgentMessage>;
55}
56
57/// Default 3-level compaction: truncate tool outputs → summarize turns → drop middle.
58///
59/// This is used automatically when no custom `CompactionStrategy` is set.
60/// You can also compose it inside a custom strategy — run your logic first,
61/// then delegate to `compact_messages()` for the actual reduction.
62pub struct DefaultCompaction;
63
64impl CompactionStrategy for DefaultCompaction {
65    fn compact(
66        &self,
67        messages: Vec<AgentMessage>, // OWNED — passed directly to compact_messages()
68        config: &ContextConfig,      // SETTINGS — forwarded to compact_messages()
69    ) -> Vec<AgentMessage> {
70        super::compact_messages::compact_messages_with_counter(
71            messages,
72            config,
73            config.token_counter.as_ref(),
74        )
75    }
76}
77
78// ---------------------------------------------------------------------------
79// Block-based compaction strategy (non-destructive overlay model)
80// ---------------------------------------------------------------------------
81
82use crate::session::LoopRecord;
83
84/// Strategy for creating non-destructive `CompactionBlock` overlays.
85///
86/// Three methods produce the three sections of a `CompactionBlock`:
87/// - `keep_first`: turns kept verbatim from the start
88/// - `keep_recent`: recent turns with truncated tool outputs
89/// - `keep_compacted`: fully summarised section
90///
91/// The default `compact()` method assembles them. Override individual methods
92/// to customise specific sections (e.g. LLM-based summarisation for `keep_compacted`).
93pub trait BlockCompactionStrategy: Send + Sync {
94    /// Determine the keep_first section: turns kept verbatim from the start.
95    /// Only called for the most recent loop.
96    fn keep_first(
97        &self,
98        record: &LoopRecord,
99        turn_map: &TurnMap,
100        config: &CompactionConfig,
101    ) -> Option<TurnRange>;
102
103    /// Create the keep_recent section: recent turns with truncated tool outputs.
104    /// Only called for the most recent loop.
105    fn keep_recent(
106        &self,
107        record: &LoopRecord,
108        turn_map: &TurnMap,
109        config: &CompactionConfig,
110    ) -> Option<CompactedSection>;
111
112    /// Create the keep_compacted section: fully summarised turns.
113    /// For most recent loop: summarises the middle (between keep_first and keep_recent).
114    /// For older loops: summarises the entire loop.
115    ///
116    /// Implementations should aim to summarise ALL turns in the range within
117    /// `config.max_summary_tokens` — e.g. shorter per-turn summaries or an
118    /// LLM-generated holistic digest. The token budget is for the total output,
119    /// not a per-turn limit.
120    fn keep_compacted(
121        &self,
122        record: &LoopRecord,
123        turn_map: &TurnMap,
124        config: &CompactionConfig,
125        is_most_recent: bool,
126    ) -> Option<CompactedSection>;
127
128    /// Assemble a `CompactionBlock` from the three sections.
129    /// Default implementation calls the three methods above.
130    fn compact(
131        &self,
132        record: &LoopRecord,
133        config: &CompactionConfig,
134        is_most_recent: bool,
135    ) -> CompactionBlock {
136        let turn_map = TurnMap::from_messages(&record.messages);
137        CompactionBlock {
138            keep_first: if is_most_recent {
139                self.keep_first(record, &turn_map, config)
140            } else {
141                None
142            },
143            keep_recent: if is_most_recent {
144                self.keep_recent(record, &turn_map, config)
145            } else {
146                None
147            },
148            keep_compacted: self.keep_compacted(record, &turn_map, config, is_most_recent),
149            created_at: Utc::now(),
150        }
151    }
152}
153
154/// Default block-based compaction strategy.
155///
156/// Stateless — all parameters come from `CompactionConfig`.
157/// - `keep_first`: returns turn range `0..keep_first_turns`
158/// - `keep_compacted`: one-liner summaries of the middle section, bounded by `max_summary_tokens`
159/// - `keep_recent`: truncates tool outputs in the recent section to `tool_output_max_lines`
160pub struct DefaultBlockCompaction;
161
162impl BlockCompactionStrategy for DefaultBlockCompaction {
163    fn keep_first(
164        &self,
165        _record: &LoopRecord,
166        turn_map: &TurnMap,
167        config: &CompactionConfig,
168    ) -> Option<TurnRange> {
169        let total = turn_map.turn_count();
170        if total == 0 {
171            return None;
172        }
173        let end = (config.keep_first_turns as u32)
174            .min(total)
175            .saturating_sub(1);
176        Some(TurnRange {
177            start_turn: 0,
178            end_turn: end,
179        })
180    }
181
182    fn keep_recent(
183        &self,
184        record: &LoopRecord,
185        turn_map: &TurnMap,
186        config: &CompactionConfig,
187    ) -> Option<CompactedSection> {
188        let total = turn_map.turn_count();
189        if total == 0 {
190            return None;
191        }
192        let recent_start = total.saturating_sub(config.keep_recent_turns as u32);
193        let range = TurnRange {
194            start_turn: recent_start,
195            end_turn: total - 1,
196        };
197        let msgs = turn_map.messages_for_range(&range, &record.messages);
198        // Truncate tool outputs in the recent section
199        let truncated: Vec<AgentMessage> = msgs
200            .iter()
201            .map(|m| {
202                if let AgentMessage::Llm(lm) = m {
203                    if let Message::ToolResult {
204                        tool_call_id,
205                        tool_name,
206                        content,
207                        is_error,
208                        timestamp,
209                    } = &lm.message
210                    {
211                        let truncated_content: Vec<Content> = content
212                            .iter()
213                            .map(|c| match c {
214                                Content::Text { text } => Content::Text {
215                                    text: super::compact_messages::truncate_text_head_tail(
216                                        text,
217                                        config.tool_output_max_lines,
218                                    ),
219                                },
220                                other => other.clone(),
221                            })
222                            .collect();
223                        return AgentMessage::Llm(LlmMessage {
224                            message: Message::ToolResult {
225                                tool_call_id: tool_call_id.clone(),
226                                tool_name: tool_name.clone(),
227                                content: truncated_content,
228                                is_error: *is_error,
229                                timestamp: *timestamp,
230                            },
231                            turn_id: lm.turn_id.clone(),
232                            // Preserve Composition I identity + tags through
233                            // tool-output truncation. Identity is a property of
234                            // the node, not its body bytes; tags ride along.
235                            node_id: lm.node_id,
236                            parent_id: lm.parent_id,
237                            tags: lm.tags.clone(),
238                        });
239                    }
240                }
241                m.clone()
242            })
243            .collect();
244        Some(CompactedSection {
245            range,
246            messages: truncated,
247        })
248    }
249
250    /// Basic implementation: generates per-turn one-liner summaries until
251    /// `max_summary_tokens` is exhausted. Remaining turns are dropped.
252    ///
253    /// More sophisticated strategies (e.g. LLM-based) should produce a holistic
254    /// summary of ALL turns within the budget rather than dropping turns.
255    ///
256    /// Summaries use `Message::User` role to maintain valid LLM message alternation
257    /// (user→assistant→user→...). A summary replaces a full turn sequence
258    /// (user + assistant + tool results) with a single user-role "[Summary]" message.
259    fn keep_compacted(
260        &self,
261        record: &LoopRecord,
262        turn_map: &TurnMap,
263        config: &CompactionConfig,
264        is_most_recent: bool,
265    ) -> Option<CompactedSection> {
266        let total = turn_map.turn_count();
267        if total == 0 {
268            return None;
269        }
270
271        let (start, end) = if is_most_recent {
272            let first_end = (config.keep_first_turns as u32).min(total);
273            let recent_start = total.saturating_sub(config.keep_recent_turns as u32);
274            if first_end >= recent_start {
275                return None; // No middle section
276            }
277            (first_end, recent_start.saturating_sub(1))
278        } else {
279            // Summarise the entire loop
280            (0, total.saturating_sub(1))
281        };
282
283        let range = TurnRange {
284            start_turn: start,
285            end_turn: end,
286        };
287        let msgs = turn_map.messages_for_range(&range, &record.messages);
288
289        // Generate one-liner summaries per assistant message
290        let mut summaries: Vec<AgentMessage> = Vec::new();
291        let mut token_budget = config.max_summary_tokens;
292
293        for msg in msgs {
294            if let AgentMessage::Llm(lm) = msg {
295                if let Message::Assistant { content, .. } = &lm.message {
296                    let text_parts: Vec<&str> = content
297                        .iter()
298                        .filter_map(|c| match c {
299                            Content::Text { text } if text.len() <= 200 => Some(text.as_str()),
300                            _ => None,
301                        })
302                        .collect();
303                    let tool_count = content
304                        .iter()
305                        .filter(|c| matches!(c, Content::ToolCall { .. }))
306                        .count();
307                    let summary = if !text_parts.is_empty() {
308                        text_parts.join(" ")
309                    } else if tool_count > 0 {
310                        format!("[Assistant used {} tool(s)]", tool_count)
311                    } else {
312                        "[Assistant response]".into()
313                    };
314                    let summary_text = format!("[Summary] {}", summary);
315                    let est_tokens = estimate_tokens(&summary_text);
316                    if est_tokens > token_budget {
317                        break; // Budget exhausted
318                    }
319                    token_budget -= est_tokens;
320                    summaries.push(AgentMessage::Llm(LlmMessage::new(Message::user(
321                        &summary_text,
322                    ))));
323                }
324            }
325        }
326
327        if summaries.is_empty() {
328            return None;
329        }
330        Some(CompactedSection {
331            range,
332            messages: summaries,
333        })
334    }
335}