phi_core/context/strategy.rs
1use super::compaction::*;
2use super::config::*;
3use super::token::*;
4use crate::types::*;
5use async_trait::async_trait;
6use chrono::Utc;
7#[allow(unused_imports)]
8use std::sync::Arc;
9
10// ---------------------------------------------------------------------------
11// Compaction strategy
12// ---------------------------------------------------------------------------
13
14/// Strategy for compacting messages when context exceeds budget.
15///
16/// Implement this to customize what happens during compaction:
17/// - Index discarded content into a memory store before removal
18/// - Apply custom preservation rules (e.g., always keep decisions)
19/// - Emit metadata about what was compressed
20///
21/// See the [Custom Compaction](https://LazyBouy.github.io/phi-core/concepts/agent-loop.html#custom-compaction)
22/// docs for examples.
23/*
24RUST QUIRK: Traits as seams for extensibility (the Strategy pattern)
25
26CompactionStrategy is a classic "strategy pattern" expressed as a Rust trait.
27The agent loop calls `strategy.compact(messages, config)` — it doesn't know
28whether it's calling DefaultCompaction or a custom user-provided strategy.
29
30This is polymorphism without inheritance:
31 - In OOP: you'd subclass a BaseCompaction class
32 - In Rust: you implement a trait
33
34The trait object `Arc<dyn CompactionStrategy>` in AgentLoopConfig means:
35"store any type that implements CompactionStrategy, dispatched at runtime."
36
37Why `Send + Sync` bounds?
38 - The agent loop may run on any tokio thread pool thread
39 - The strategy is shared via Arc, so it must be Sync (safe to &-reference across threads)
40 - It must be Send (safe to move to another thread)
41 - Basically: thread-safe is required because tokio = multi-threaded by default
42
43The `messages: Vec<AgentMessage>` parameter takes ownership (not a borrow).
44This is intentional: compaction rewrites the list. Passing by value lets the
45implementation freely mutate, filter, and reconstruct without cloning.
46*/
47pub trait CompactionStrategy: Send + Sync {
48 /// Compact messages to fit within the token budget defined by `config`.
49 ///
50 /// Called before each LLM turn when `context_config` is set.
51 fn compact(
52 &self,
53 messages: Vec<AgentMessage>, // OWNED — taken by value so implementation can freely rewrite without cloning
54 config: &ContextConfig, // SETTINGS — token budget, keep_first/keep_recent counts, tool_output_max_lines
55 ) -> Vec<AgentMessage>;
56}
57
58/// Default 3-level compaction: truncate tool outputs → summarize turns → drop middle.
59///
60/// This is used automatically when no custom `CompactionStrategy` is set.
61/// You can also compose it inside a custom strategy — run your logic first,
62/// then delegate to `compact_messages()` for the actual reduction.
63pub struct DefaultCompaction;
64
65impl CompactionStrategy for DefaultCompaction {
66 fn compact(
67 &self,
68 messages: Vec<AgentMessage>, // OWNED — passed directly to compact_messages()
69 config: &ContextConfig, // SETTINGS — forwarded to compact_messages()
70 ) -> Vec<AgentMessage> {
71 super::compact_messages::compact_messages_with_counter(
72 messages,
73 config,
74 config.token_counter.as_ref(),
75 )
76 }
77}
78
79// ---------------------------------------------------------------------------
80// Block-based compaction strategy (non-destructive overlay model)
81// ---------------------------------------------------------------------------
82
83use crate::session::LoopRecord;
84
85/// Strategy for creating non-destructive `CompactionBlock` overlays.
86///
87/// Three methods produce the three sections of a `CompactionBlock`:
88/// - `keep_first`: turns kept verbatim from the start
89/// - `keep_recent`: recent turns with truncated tool outputs
90/// - `keep_compacted`: fully summarised section
91///
92/// The default `compact()` method assembles them. Override individual methods
93/// to customise specific sections (e.g. LLM-based summarisation for `keep_compacted`).
94///
95/// As of phi-core 0.9.0, all four methods are `async fn` (via `#[async_trait]`)
96/// so implementations can issue LLM calls inside `keep_compacted`/`keep_recent`
97/// without `block_in_place` workarounds. Sync implementations migrate by
98/// prepending `#[async_trait]` to the impl + `async` to each method —
99/// existing bodies need no changes if they don't `.await` anything.
100#[async_trait]
101pub trait BlockCompactionStrategy: Send + Sync {
102 /// Determine the keep_first section: turns kept verbatim from the start.
103 /// Only called for the most recent loop.
104 async fn keep_first(
105 &self,
106 record: &LoopRecord,
107 turn_map: &TurnMap,
108 config: &CompactionConfig,
109 ) -> Option<TurnRange>;
110
111 /// Create the keep_recent section: recent turns with truncated tool outputs.
112 /// Only called for the most recent loop.
113 async fn keep_recent(
114 &self,
115 record: &LoopRecord,
116 turn_map: &TurnMap,
117 config: &CompactionConfig,
118 ) -> Option<CompactedSection>;
119
120 /// Create the keep_compacted section: fully summarised turns.
121 /// For most recent loop: summarises the middle (between keep_first and keep_recent).
122 /// For older loops: summarises the entire loop.
123 ///
124 /// Implementations should aim to summarise ALL turns in the range within
125 /// `config.max_summary_tokens` — e.g. shorter per-turn summaries or an
126 /// LLM-generated holistic digest. The token budget is for the total output,
127 /// not a per-turn limit.
128 async fn keep_compacted(
129 &self,
130 record: &LoopRecord,
131 turn_map: &TurnMap,
132 config: &CompactionConfig,
133 is_most_recent: bool,
134 ) -> Option<CompactedSection>;
135
136 /// Assemble a `CompactionBlock` from the three sections.
137 /// Default implementation calls the three methods above.
138 async fn compact(
139 &self,
140 record: &LoopRecord,
141 config: &CompactionConfig,
142 is_most_recent: bool,
143 ) -> CompactionBlock {
144 let turn_map = TurnMap::from_messages(&record.messages);
145 let keep_first = if is_most_recent {
146 self.keep_first(record, &turn_map, config).await
147 } else {
148 None
149 };
150 let keep_recent = if is_most_recent {
151 self.keep_recent(record, &turn_map, config).await
152 } else {
153 None
154 };
155 let keep_compacted = self
156 .keep_compacted(record, &turn_map, config, is_most_recent)
157 .await;
158 CompactionBlock {
159 keep_first,
160 keep_recent,
161 keep_compacted,
162 created_at: Utc::now(),
163 }
164 }
165}
166
167/// Default block-based compaction strategy.
168///
169/// Stateless — all parameters come from `CompactionConfig`.
170/// - `keep_first`: returns turn range `0..keep_first_turns`
171/// - `keep_compacted`: one-liner summaries of the middle section, bounded by `max_summary_tokens`
172/// - `keep_recent`: truncates tool outputs in the recent section to `tool_output_max_lines`
173pub struct DefaultBlockCompaction;
174
175#[async_trait]
176impl BlockCompactionStrategy for DefaultBlockCompaction {
177 async fn keep_first(
178 &self,
179 _record: &LoopRecord,
180 turn_map: &TurnMap,
181 config: &CompactionConfig,
182 ) -> Option<TurnRange> {
183 let total = turn_map.turn_count();
184 if total == 0 {
185 return None;
186 }
187 let end = (config.keep_first_turns as u32)
188 .min(total)
189 .saturating_sub(1);
190 Some(TurnRange {
191 start_turn: 0,
192 end_turn: end,
193 })
194 }
195
196 async fn keep_recent(
197 &self,
198 record: &LoopRecord,
199 turn_map: &TurnMap,
200 config: &CompactionConfig,
201 ) -> Option<CompactedSection> {
202 let total = turn_map.turn_count();
203 if total == 0 {
204 return None;
205 }
206 let recent_start = total.saturating_sub(config.keep_recent_turns as u32);
207 let range = TurnRange {
208 start_turn: recent_start,
209 end_turn: total - 1,
210 };
211 let msgs = turn_map.messages_for_range(&range, &record.messages);
212 // Truncate tool outputs in the recent section
213 let truncated: Vec<AgentMessage> = msgs
214 .iter()
215 .map(|m| {
216 if let AgentMessage::Llm(lm) = m {
217 if let Message::ToolResult {
218 tool_call_id,
219 tool_name,
220 content,
221 is_error,
222 timestamp,
223 } = &lm.message
224 {
225 let truncated_content: Vec<Content> = content
226 .iter()
227 .map(|c| match c {
228 Content::Text { text } => Content::Text {
229 text: super::compact_messages::truncate_text_head_tail(
230 text,
231 config.tool_output_max_lines,
232 ),
233 },
234 other => other.clone(),
235 })
236 .collect();
237 return AgentMessage::Llm(LlmMessage {
238 message: Message::ToolResult {
239 tool_call_id: tool_call_id.clone(),
240 tool_name: tool_name.clone(),
241 content: truncated_content,
242 is_error: *is_error,
243 timestamp: *timestamp,
244 },
245 turn_id: lm.turn_id.clone(),
246 // Preserve Composition I identity + tags through
247 // tool-output truncation. Identity is a property of
248 // the node, not its body bytes; tags ride along.
249 node_id: lm.node_id,
250 parent_id: lm.parent_id,
251 tags: lm.tags.clone(),
252 provenance_hint: lm.provenance_hint.clone(),
253 });
254 }
255 }
256 m.clone()
257 })
258 .collect();
259 Some(CompactedSection {
260 range,
261 messages: truncated,
262 })
263 }
264
265 /// Basic implementation: generates per-turn one-liner summaries until
266 /// `max_summary_tokens` is exhausted. Remaining turns are dropped.
267 ///
268 /// More sophisticated strategies (e.g. LLM-based) should produce a holistic
269 /// summary of ALL turns within the budget rather than dropping turns.
270 ///
271 /// Summaries use `Message::User` role to maintain valid LLM message alternation
272 /// (user→assistant→user→...). A summary replaces a full turn sequence
273 /// (user + assistant + tool results) with a single user-role "[Summary]" message.
274 async fn keep_compacted(
275 &self,
276 record: &LoopRecord,
277 turn_map: &TurnMap,
278 config: &CompactionConfig,
279 is_most_recent: bool,
280 ) -> Option<CompactedSection> {
281 let total = turn_map.turn_count();
282 if total == 0 {
283 return None;
284 }
285
286 let (start, end) = if is_most_recent {
287 let first_end = (config.keep_first_turns as u32).min(total);
288 let recent_start = total.saturating_sub(config.keep_recent_turns as u32);
289 if first_end >= recent_start {
290 return None; // No middle section
291 }
292 (first_end, recent_start.saturating_sub(1))
293 } else {
294 // Summarise the entire loop
295 (0, total.saturating_sub(1))
296 };
297
298 let range = TurnRange {
299 start_turn: start,
300 end_turn: end,
301 };
302 let msgs = turn_map.messages_for_range(&range, &record.messages);
303
304 // Generate one-liner summaries per assistant message
305 let mut summaries: Vec<AgentMessage> = Vec::new();
306 let mut token_budget = config.max_summary_tokens;
307
308 for msg in msgs {
309 if let AgentMessage::Llm(lm) = msg {
310 if let Message::Assistant { content, .. } = &lm.message {
311 let text_parts: Vec<&str> = content
312 .iter()
313 .filter_map(|c| match c {
314 Content::Text { text } if text.len() <= 200 => Some(text.as_str()),
315 _ => None,
316 })
317 .collect();
318 let tool_count = content
319 .iter()
320 .filter(|c| matches!(c, Content::ToolCall { .. }))
321 .count();
322 let summary = if !text_parts.is_empty() {
323 text_parts.join(" ")
324 } else if tool_count > 0 {
325 format!("[Assistant used {} tool(s)]", tool_count)
326 } else {
327 "[Assistant response]".into()
328 };
329 let summary_text = format!("[Summary] {}", summary);
330 let est_tokens = estimate_tokens(&summary_text);
331 if est_tokens > token_budget {
332 break; // Budget exhausted
333 }
334 token_budget -= est_tokens;
335 summaries.push(AgentMessage::Llm(LlmMessage::new(Message::user(
336 &summary_text,
337 ))));
338 }
339 }
340 }
341
342 if summaries.is_empty() {
343 return None;
344 }
345 Some(CompactedSection {
346 range,
347 messages: summaries,
348 })
349 }
350}