phi_core/tools/prun.rs
1//! Model-invocable tool for surgical context pruning (2-stream architecture).
2/*
3ARCHITECTURE: PrunTool — model-directed context pruning via deferred execution
4
5Unlike every other built-in tool (bash, read_file, write_file, edit_file, list_files,
6search), PrunTool does NOT perform its work inside `execute()`. It cannot — pruning
7mutates the agent's context window, which is owned by the agent loop, not the tool.
8
9Instead, `execute()` only enqueues a `PrunRequest` onto a shared `Arc<Mutex<Vec<_>>>`
10queue. Between turns, the agent loop drains this queue and applies the requested
11pruning to the in-run context stream. See `agent_loop/run.rs` lines 424-426 (drain)
12and the `apply_prun` function (around line 524) for the consumer side.
13
14Two-stream context model (see `concepts/compaction` docs):
15 `user_context` — messages typed by the user; NEVER pruned (preserves intent)
16 `inrun_context` — assistant / tool-result chatter; the tail end is what `prun()` trims
17
18Why deferred execution and not direct mutation?
19 1. Ownership — the tool has `&self`; mutating the agent's context would require
20 either threading `&mut AgentContext` through `ToolContext` (intrusive, breaks
21 concurrency for parallel tool execution) or a second `Arc<Mutex<AgentContext>>`
22 (deadlock risk because the loop already holds it).
23 2. Timing — pruning mid-turn while the LLM stream is open would invalidate the
24 content_index counters in `StreamEvent` deltas. Between-turn application is
25 the only safe window.
26 3. Auditing — the queued `PrunRequest` is part of the loop's event stream, so
27 session recorders see the pruning as a discrete event and can reconstruct the
28 full pre-prune context from the session log via `PrunRecord`.
29
30Two variants share one tool implementation (toggled by `PrunVariant`):
31 `prun(tokens)` — silent removal; pruned content is gone from context
32 `prun_with_memo(tokens, m)` — removal + replacement with a summary string the LLM
33 writes; useful when exploration had findings worth
34 keeping in compressed form.
35
36Both variants are wired together in `BasicAgent::with_prun_tool()` so they share a
37single `prun_pending` queue — order of submissions across the two tools is preserved.
38*/
39
40use crate::types::*;
41use std::sync::{Arc, Mutex};
42
43/// A pending prun request the LLM submitted via `prun` or `prun_with_memo`.
44///
45/// Lifecycle:
46/// 1. `PrunTool::execute()` pushes one of these onto the shared `pending` queue.
47/// 2. The agent loop drains the queue between turns (see `agent_loop/run.rs:424`).
48/// 3. Each request is applied to `AgentContext.inrun_context` in submission order,
49/// producing a `PrunRecord` event that the session recorder captures.
50///
51/// `tokens_to_remove` is an upper bound — the loop walks the tail of `inrun_context`
52/// removing whole entries until at least this many tokens have been freed. User
53/// messages are never affected (they live in the separate `user_context` stream).
54#[derive(Debug, Clone)]
55pub struct PrunRequest {
56 /// Lower bound on tokens to remove from the tail of `inrun_context`. The loop
57 /// rounds up to the nearest whole entry so a single message is never split.
58 pub tokens_to_remove: usize,
59 /// Optional summary inserted in place of pruned content. `Some` for the
60 /// `prun_with_memo` variant; `None` for the silent `prun` variant.
61 pub memo: Option<String>,
62}
63
64/// Structured metadata persisted in the `details` field of a prun `ToolResult`.
65///
66/// Captured by `SessionRecorder` so a session replay can reconstruct exactly what
67/// was pruned and (if a memo was supplied) what replaced it. Crucially, the actual
68/// pruned message contents live in the session log proper — `pruned_timestamps`
69/// is the cross-reference key, not a copy of the content.
70#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
71pub struct PrunRecord {
72 /// Unix-millis timestamps of every message removed in this prun cycle. Keyed
73 /// against `Message::*::timestamp` so the session log can re-link pruned content.
74 pub pruned_timestamps: Vec<u64>,
75 /// Actual token count freed (may exceed `PrunRequest.tokens_to_remove` because
76 /// pruning operates on whole-message boundaries).
77 pub tokens_removed: usize,
78 /// The summary string inserted in place of pruned content, if this was a
79 /// `prun_with_memo` invocation.
80 pub memo: Option<String>,
81}
82
83/// Which flavour of prun this `PrunTool` instance exposes to the model.
84///
85/// The same `PrunTool` struct backs both variants — only `name()`, `description()`,
86/// `parameters_schema()`, and the memo-handling branch in `execute()` differ. Two
87/// variants are exposed (rather than a single tool with an optional memo) so the
88/// LLM sees them in `tools/list` as distinct affordances with separate descriptions
89/// — easier for the model to pick the right one.
90#[derive(Debug, Clone, Copy)]
91pub enum PrunVariant {
92 /// `prun(tokens)` — silently remove the last N tokens of in-run context.
93 Prun,
94 /// `prun_with_memo(tokens, memo)` — remove and replace with an LLM-written summary.
95 PrunWithMemo,
96}
97
98/// Model-invocable tool for surgical context pruning.
99///
100/// The `pending` queue is shared with the agent loop via `AgentLoopConfig.prun_pending`
101/// (an `Arc<Mutex<Vec<PrunRequest>>>`). One `PrunTool` per variant; both variants
102/// share the same `pending` queue so cross-variant ordering is preserved.
103pub struct PrunTool {
104 /*
105 RUST QUIRK: `Arc<Mutex<Vec<PrunRequest>>>` — the canonical "shared mutable queue"
106
107 Three layers, each with a purpose:
108 `Vec<PrunRequest>` — the queue itself; FIFO of pending requests
109 `Mutex<Vec<...>>` — serialised access; only one thread mutates at a time
110 `Arc<Mutex<...>>` — shared ownership across the tool, the agent loop,
111 and (when parallel tool execution is on) sibling tools
112
113 `Arc::clone()` increments a reference count; cheap. `mutex.lock().unwrap()` blocks
114 until exclusive access is acquired. Drained between turns by the agent loop.
115
116 Python analogy: a `threading.Lock`-guarded `collections.deque` shared via a class
117 attribute — except Rust forces the locking discipline at compile time.
118 */
119 pending: Arc<Mutex<Vec<PrunRequest>>>,
120 /// Which of the two surface APIs this instance exposes; `name()`/`description()`
121 /// switch on it.
122 variant: PrunVariant,
123}
124
125impl PrunTool {
126 /// Create a new `PrunTool` bound to a shared `pending` queue.
127 ///
128 /// Call once per variant (Prun + PrunWithMemo) passing the same `Arc<Mutex<_>>`
129 /// so both tools enqueue into the same drain. `BasicAgent::with_prun_tool()`
130 /// does this wiring automatically.
131 pub fn new(pending: Arc<Mutex<Vec<PrunRequest>>>, variant: PrunVariant) -> Self {
132 Self { pending, variant }
133 }
134}
135
136#[async_trait::async_trait]
137impl AgentTool for PrunTool {
138 fn name(&self) -> &str {
139 match self.variant {
140 PrunVariant::Prun => "prun",
141 PrunVariant::PrunWithMemo => "prun_with_memo",
142 }
143 }
144
145 fn label(&self) -> &str {
146 match self.variant {
147 PrunVariant::Prun => "Prun",
148 PrunVariant::PrunWithMemo => "Prun with Memo",
149 }
150 }
151
152 fn description(&self) -> &str {
153 match self.variant {
154 PrunVariant::Prun => "Surgically remove the last N tokens of model-generated (in-run) context. Use when exploration or tool results waste context length. Pruned content is preserved in session log.",
155 PrunVariant::PrunWithMemo => "Surgically remove the last N tokens of in-run context and replace with a summary memo. Use when exploration had findings worth remembering but full content is too verbose.",
156 }
157 }
158
159 fn parameters_schema(&self) -> serde_json::Value {
160 match self.variant {
161 PrunVariant::Prun => serde_json::json!({
162 "type": "object",
163 "properties": {
164 "tokens": {"type": "integer", "description": "Tokens to remove from tail of in-run context"}
165 },
166 "required": ["tokens"]
167 }),
168 PrunVariant::PrunWithMemo => serde_json::json!({
169 "type": "object",
170 "properties": {
171 "tokens": {"type": "integer", "description": "Tokens to remove from tail of in-run context"},
172 "memo": {"type": "string", "description": "Summary to insert in place of pruned content"}
173 },
174 "required": ["tokens", "memo"]
175 }),
176 }
177 }
178
179 /*
180 DESIGN: execute() enqueues; it does not prune.
181
182 The function looks oddly small for a tool — that's intentional. Real pruning is
183 performed by the agent loop between turns (see file-level ARCHITECTURE block).
184 All `execute()` does is:
185 1. Validate input (`tokens > 0`, plus `memo` for the with-memo variant).
186 2. Push a `PrunRequest` onto the shared queue.
187 3. Return a placeholder `ToolResult` so the LLM sees the call was accepted.
188
189 `_ctx` is intentionally unused — there's no I/O, no cancellation budget to honour,
190 no streaming output. The synthetic ToolResult will be observed by the LLM as
191 "your prun request was recorded"; the actual pruning takes effect before the
192 next prompt is built, replacing those messages in the context the LLM sees next.
193 */
194 async fn execute(
195 &self,
196 params: serde_json::Value, // LLM INPUT — `{"tokens": N}` or `{"tokens": N, "memo": "..."}`
197 _ctx: ToolContext, // SYSTEM ENV — unused; pruning is deferred to the agent loop
198 ) -> Result<ToolResult, ToolError> {
199 // Validate `tokens` — must be a positive integer. A missing or non-integer
200 // value would otherwise silently default to 0 and produce a no-op enqueue.
201 let tokens = params.get("tokens").and_then(|v| v.as_u64()).unwrap_or(0) as usize;
202 if tokens == 0 {
203 return Err(ToolError::InvalidArgs("tokens must be > 0".to_string()));
204 }
205
206 // Extract the memo only for the with-memo variant. The bare `prun` variant
207 // ignores any memo field even if the LLM accidentally supplies one — this
208 // keeps the two tools' on-the-wire semantics strictly separate.
209 let memo = match self.variant {
210 PrunVariant::PrunWithMemo => params
211 .get("memo")
212 .and_then(|v| v.as_str())
213 .map(|s| s.to_string()),
214 PrunVariant::Prun => None,
215 };
216
217 // Enqueue. `.lock().unwrap()` panics on mutex poisoning, which would indicate
218 // a panic in a previous holder of the lock — a bug worth surfacing loudly.
219 // (Contrast with the steering-queue poison-tolerant lock in BasicAgent, where
220 // recoverable behaviour is preferred because hooks run user code; here the
221 // only writer is this tool plus the agent-loop drain, both internal.)
222 self.pending.lock().unwrap().push(PrunRequest {
223 tokens_to_remove: tokens,
224 memo,
225 });
226
227 // Synthetic acknowledgement message — the LLM sees this in the next turn's
228 // ToolResult. The actual pruning is invisible to the model except by the
229 // shorter context window it observes next turn.
230 Ok(ToolResult {
231 content: vec![Content::Text {
232 text: format!(
233 "Prun request recorded: {} tokens will be removed before next turn.",
234 tokens
235 ),
236 }],
237 details: serde_json::Value::Null,
238 child_loop_id: None,
239 })
240 }
241}