phi_core/context/strategy.rs
1use super::compaction::*;
2use super::config::*;
3use super::token::*;
4use crate::types::*;
5use chrono::Utc;
6#[allow(unused_imports)]
7use std::sync::Arc;
8
9// ---------------------------------------------------------------------------
10// Compaction strategy
11// ---------------------------------------------------------------------------
12
13/// Strategy for compacting messages when context exceeds budget.
14///
15/// Implement this to customize what happens during compaction:
16/// - Index discarded content into a memory store before removal
17/// - Apply custom preservation rules (e.g., always keep decisions)
18/// - Emit metadata about what was compressed
19///
20/// See the [Custom Compaction](https://LazyBouy.github.io/phi-core/concepts/agent-loop.html#custom-compaction)
21/// docs for examples.
22/*
23RUST QUIRK: Traits as seams for extensibility (the Strategy pattern)
24
25CompactionStrategy is a classic "strategy pattern" expressed as a Rust trait.
26The agent loop calls `strategy.compact(messages, config)` — it doesn't know
27whether it's calling DefaultCompaction or a custom user-provided strategy.
28
29This is polymorphism without inheritance:
30 - In OOP: you'd subclass a BaseCompaction class
31 - In Rust: you implement a trait
32
33The trait object `Arc<dyn CompactionStrategy>` in AgentLoopConfig means:
34"store any type that implements CompactionStrategy, dispatched at runtime."
35
36Why `Send + Sync` bounds?
37 - The agent loop may run on any tokio thread pool thread
38 - The strategy is shared via Arc, so it must be Sync (safe to &-reference across threads)
39 - It must be Send (safe to move to another thread)
40 - Basically: thread-safe is required because tokio = multi-threaded by default
41
42The `messages: Vec<AgentMessage>` parameter takes ownership (not a borrow).
43This is intentional: compaction rewrites the list. Passing by value lets the
44implementation freely mutate, filter, and reconstruct without cloning.
45*/
46pub trait CompactionStrategy: Send + Sync {
47 /// Compact messages to fit within the token budget defined by `config`.
48 ///
49 /// Called before each LLM turn when `context_config` is set.
50 fn compact(
51 &self,
52 messages: Vec<AgentMessage>, // OWNED — taken by value so implementation can freely rewrite without cloning
53 config: &ContextConfig, // SETTINGS — token budget, keep_first/keep_recent counts, tool_output_max_lines
54 ) -> Vec<AgentMessage>;
55}
56
57/// Default 3-level compaction: truncate tool outputs → summarize turns → drop middle.
58///
59/// This is used automatically when no custom `CompactionStrategy` is set.
60/// You can also compose it inside a custom strategy — run your logic first,
61/// then delegate to `compact_messages()` for the actual reduction.
62pub struct DefaultCompaction;
63
64impl CompactionStrategy for DefaultCompaction {
65 fn compact(
66 &self,
67 messages: Vec<AgentMessage>, // OWNED — passed directly to compact_messages()
68 config: &ContextConfig, // SETTINGS — forwarded to compact_messages()
69 ) -> Vec<AgentMessage> {
70 super::compact_messages::compact_messages_with_counter(
71 messages,
72 config,
73 config.token_counter.as_ref(),
74 )
75 }
76}
77
78// ---------------------------------------------------------------------------
79// Block-based compaction strategy (non-destructive overlay model)
80// ---------------------------------------------------------------------------
81
82use crate::session::LoopRecord;
83
84/// Strategy for creating non-destructive `CompactionBlock` overlays.
85///
86/// Three methods produce the three sections of a `CompactionBlock`:
87/// - `keep_first`: turns kept verbatim from the start
88/// - `keep_recent`: recent turns with truncated tool outputs
89/// - `keep_compacted`: fully summarised section
90///
91/// The default `compact()` method assembles them. Override individual methods
92/// to customise specific sections (e.g. LLM-based summarisation for `keep_compacted`).
93pub trait BlockCompactionStrategy: Send + Sync {
94 /// Determine the keep_first section: turns kept verbatim from the start.
95 /// Only called for the most recent loop.
96 fn keep_first(
97 &self,
98 record: &LoopRecord,
99 turn_map: &TurnMap,
100 config: &CompactionConfig,
101 ) -> Option<TurnRange>;
102
103 /// Create the keep_recent section: recent turns with truncated tool outputs.
104 /// Only called for the most recent loop.
105 fn keep_recent(
106 &self,
107 record: &LoopRecord,
108 turn_map: &TurnMap,
109 config: &CompactionConfig,
110 ) -> Option<CompactedSection>;
111
112 /// Create the keep_compacted section: fully summarised turns.
113 /// For most recent loop: summarises the middle (between keep_first and keep_recent).
114 /// For older loops: summarises the entire loop.
115 ///
116 /// Implementations should aim to summarise ALL turns in the range within
117 /// `config.max_summary_tokens` — e.g. shorter per-turn summaries or an
118 /// LLM-generated holistic digest. The token budget is for the total output,
119 /// not a per-turn limit.
120 fn keep_compacted(
121 &self,
122 record: &LoopRecord,
123 turn_map: &TurnMap,
124 config: &CompactionConfig,
125 is_most_recent: bool,
126 ) -> Option<CompactedSection>;
127
128 /// Assemble a `CompactionBlock` from the three sections.
129 /// Default implementation calls the three methods above.
130 fn compact(
131 &self,
132 record: &LoopRecord,
133 config: &CompactionConfig,
134 is_most_recent: bool,
135 ) -> CompactionBlock {
136 let turn_map = TurnMap::from_messages(&record.messages);
137 CompactionBlock {
138 keep_first: if is_most_recent {
139 self.keep_first(record, &turn_map, config)
140 } else {
141 None
142 },
143 keep_recent: if is_most_recent {
144 self.keep_recent(record, &turn_map, config)
145 } else {
146 None
147 },
148 keep_compacted: self.keep_compacted(record, &turn_map, config, is_most_recent),
149 created_at: Utc::now(),
150 }
151 }
152}
153
154/// Default block-based compaction strategy.
155///
156/// Stateless — all parameters come from `CompactionConfig`.
157/// - `keep_first`: returns turn range `0..keep_first_turns`
158/// - `keep_compacted`: one-liner summaries of the middle section, bounded by `max_summary_tokens`
159/// - `keep_recent`: truncates tool outputs in the recent section to `tool_output_max_lines`
160pub struct DefaultBlockCompaction;
161
162impl BlockCompactionStrategy for DefaultBlockCompaction {
163 fn keep_first(
164 &self,
165 _record: &LoopRecord,
166 turn_map: &TurnMap,
167 config: &CompactionConfig,
168 ) -> Option<TurnRange> {
169 let total = turn_map.turn_count();
170 if total == 0 {
171 return None;
172 }
173 let end = (config.keep_first_turns as u32)
174 .min(total)
175 .saturating_sub(1);
176 Some(TurnRange {
177 start_turn: 0,
178 end_turn: end,
179 })
180 }
181
182 fn keep_recent(
183 &self,
184 record: &LoopRecord,
185 turn_map: &TurnMap,
186 config: &CompactionConfig,
187 ) -> Option<CompactedSection> {
188 let total = turn_map.turn_count();
189 if total == 0 {
190 return None;
191 }
192 let recent_start = total.saturating_sub(config.keep_recent_turns as u32);
193 let range = TurnRange {
194 start_turn: recent_start,
195 end_turn: total - 1,
196 };
197 let msgs = turn_map.messages_for_range(&range, &record.messages);
198 // Truncate tool outputs in the recent section
199 let truncated: Vec<AgentMessage> = msgs
200 .iter()
201 .map(|m| {
202 if let AgentMessage::Llm(lm) = m {
203 if let Message::ToolResult {
204 tool_call_id,
205 tool_name,
206 content,
207 is_error,
208 timestamp,
209 } = &lm.message
210 {
211 let truncated_content: Vec<Content> = content
212 .iter()
213 .map(|c| match c {
214 Content::Text { text } => Content::Text {
215 text: super::compact_messages::truncate_text_head_tail(
216 text,
217 config.tool_output_max_lines,
218 ),
219 },
220 other => other.clone(),
221 })
222 .collect();
223 return AgentMessage::Llm(LlmMessage {
224 message: Message::ToolResult {
225 tool_call_id: tool_call_id.clone(),
226 tool_name: tool_name.clone(),
227 content: truncated_content,
228 is_error: *is_error,
229 timestamp: *timestamp,
230 },
231 turn_id: lm.turn_id.clone(),
232 // Preserve Composition I identity + tags through
233 // tool-output truncation. Identity is a property of
234 // the node, not its body bytes; tags ride along.
235 node_id: lm.node_id,
236 parent_id: lm.parent_id,
237 tags: lm.tags.clone(),
238 });
239 }
240 }
241 m.clone()
242 })
243 .collect();
244 Some(CompactedSection {
245 range,
246 messages: truncated,
247 })
248 }
249
250 /// Basic implementation: generates per-turn one-liner summaries until
251 /// `max_summary_tokens` is exhausted. Remaining turns are dropped.
252 ///
253 /// More sophisticated strategies (e.g. LLM-based) should produce a holistic
254 /// summary of ALL turns within the budget rather than dropping turns.
255 ///
256 /// Summaries use `Message::User` role to maintain valid LLM message alternation
257 /// (user→assistant→user→...). A summary replaces a full turn sequence
258 /// (user + assistant + tool results) with a single user-role "[Summary]" message.
259 fn keep_compacted(
260 &self,
261 record: &LoopRecord,
262 turn_map: &TurnMap,
263 config: &CompactionConfig,
264 is_most_recent: bool,
265 ) -> Option<CompactedSection> {
266 let total = turn_map.turn_count();
267 if total == 0 {
268 return None;
269 }
270
271 let (start, end) = if is_most_recent {
272 let first_end = (config.keep_first_turns as u32).min(total);
273 let recent_start = total.saturating_sub(config.keep_recent_turns as u32);
274 if first_end >= recent_start {
275 return None; // No middle section
276 }
277 (first_end, recent_start.saturating_sub(1))
278 } else {
279 // Summarise the entire loop
280 (0, total.saturating_sub(1))
281 };
282
283 let range = TurnRange {
284 start_turn: start,
285 end_turn: end,
286 };
287 let msgs = turn_map.messages_for_range(&range, &record.messages);
288
289 // Generate one-liner summaries per assistant message
290 let mut summaries: Vec<AgentMessage> = Vec::new();
291 let mut token_budget = config.max_summary_tokens;
292
293 for msg in msgs {
294 if let AgentMessage::Llm(lm) = msg {
295 if let Message::Assistant { content, .. } = &lm.message {
296 let text_parts: Vec<&str> = content
297 .iter()
298 .filter_map(|c| match c {
299 Content::Text { text } if text.len() <= 200 => Some(text.as_str()),
300 _ => None,
301 })
302 .collect();
303 let tool_count = content
304 .iter()
305 .filter(|c| matches!(c, Content::ToolCall { .. }))
306 .count();
307 let summary = if !text_parts.is_empty() {
308 text_parts.join(" ")
309 } else if tool_count > 0 {
310 format!("[Assistant used {} tool(s)]", tool_count)
311 } else {
312 "[Assistant response]".into()
313 };
314 let summary_text = format!("[Summary] {}", summary);
315 let est_tokens = estimate_tokens(&summary_text);
316 if est_tokens > token_budget {
317 break; // Budget exhausted
318 }
319 token_budget -= est_tokens;
320 summaries.push(AgentMessage::Llm(LlmMessage::new(Message::user(
321 &summary_text,
322 ))));
323 }
324 }
325 }
326
327 if summaries.is_empty() {
328 return None;
329 }
330 Some(CompactedSection {
331 range,
332 messages: summaries,
333 })
334 }
335}