Skip to main content

phi_core/tools/
prun.rs

1//! Model-invocable tool for surgical context pruning (2-stream architecture).
2/*
3ARCHITECTURE: PrunTool — model-directed context pruning via deferred execution
4
5Unlike every other built-in tool (bash, read_file, write_file, edit_file, list_files,
6search), PrunTool does NOT perform its work inside `execute()`. It cannot — pruning
7mutates the agent's context window, which is owned by the agent loop, not the tool.
8
9Instead, `execute()` only enqueues a `PrunRequest` onto a shared `Arc<Mutex<Vec<_>>>`
10queue. Between turns, the agent loop drains this queue and applies the requested
11pruning to the in-run context stream. See `agent_loop/run.rs` lines 424-426 (drain)
12and the `apply_prun` function (around line 524) for the consumer side.
13
14Two-stream context model (see `concepts/compaction` docs):
15  `user_context`   — messages typed by the user; NEVER pruned (preserves intent)
16  `inrun_context`  — assistant / tool-result chatter; the tail end is what `prun()` trims
17
18Why deferred execution and not direct mutation?
19  1. Ownership — the tool has `&self`; mutating the agent's context would require
20     either threading `&mut AgentContext` through `ToolContext` (intrusive, breaks
21     concurrency for parallel tool execution) or a second `Arc<Mutex<AgentContext>>`
22     (deadlock risk because the loop already holds it).
23  2. Timing — pruning mid-turn while the LLM stream is open would invalidate the
24     content_index counters in `StreamEvent` deltas. Between-turn application is
25     the only safe window.
26  3. Auditing — the queued `PrunRequest` is part of the loop's event stream, so
27     session recorders see the pruning as a discrete event and can reconstruct the
28     full pre-prune context from the session log via `PrunRecord`.
29
30Two variants share one tool implementation (toggled by `PrunVariant`):
31  `prun(tokens)`              — silent removal; pruned content is gone from context
32  `prun_with_memo(tokens, m)` — removal + replacement with a summary string the LLM
33                                writes; useful when exploration had findings worth
34                                keeping in compressed form.
35
36Both variants are wired together in `BasicAgent::with_prun_tool()` so they share a
37single `prun_pending` queue — order of submissions across the two tools is preserved.
38*/
39
40use crate::types::*;
41use std::sync::{Arc, Mutex};
42
43/// A pending prun request the LLM submitted via `prun` or `prun_with_memo`.
44///
45/// Lifecycle:
46/// 1. `PrunTool::execute()` pushes one of these onto the shared `pending` queue.
47/// 2. The agent loop drains the queue between turns (see `agent_loop/run.rs:424`).
48/// 3. Each request is applied to `AgentContext.inrun_context` in submission order,
49///    producing a `PrunRecord` event that the session recorder captures.
50///
51/// `tokens_to_remove` is an upper bound — the loop walks the tail of `inrun_context`
52/// removing whole entries until at least this many tokens have been freed. User
53/// messages are never affected (they live in the separate `user_context` stream).
54#[derive(Debug, Clone)]
55pub struct PrunRequest {
56    /// Lower bound on tokens to remove from the tail of `inrun_context`. The loop
57    /// rounds up to the nearest whole entry so a single message is never split.
58    pub tokens_to_remove: usize,
59    /// Optional summary inserted in place of pruned content. `Some` for the
60    /// `prun_with_memo` variant; `None` for the silent `prun` variant.
61    pub memo: Option<String>,
62}
63
64/// Structured metadata persisted in the `details` field of a prun `ToolResult`.
65///
66/// Captured by `SessionRecorder` so a session replay can reconstruct exactly what
67/// was pruned and (if a memo was supplied) what replaced it. Crucially, the actual
68/// pruned message contents live in the session log proper — `pruned_timestamps`
69/// is the cross-reference key, not a copy of the content.
70#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
71pub struct PrunRecord {
72    /// Unix-millis timestamps of every message removed in this prun cycle. Keyed
73    /// against `Message::*::timestamp` so the session log can re-link pruned content.
74    pub pruned_timestamps: Vec<u64>,
75    /// Actual token count freed (may exceed `PrunRequest.tokens_to_remove` because
76    /// pruning operates on whole-message boundaries).
77    pub tokens_removed: usize,
78    /// The summary string inserted in place of pruned content, if this was a
79    /// `prun_with_memo` invocation.
80    pub memo: Option<String>,
81}
82
83/// Which flavour of prun this `PrunTool` instance exposes to the model.
84///
85/// The same `PrunTool` struct backs both variants — only `name()`, `description()`,
86/// `parameters_schema()`, and the memo-handling branch in `execute()` differ. Two
87/// variants are exposed (rather than a single tool with an optional memo) so the
88/// LLM sees them in `tools/list` as distinct affordances with separate descriptions
89/// — easier for the model to pick the right one.
90#[derive(Debug, Clone, Copy)]
91pub enum PrunVariant {
92    /// `prun(tokens)` — silently remove the last N tokens of in-run context.
93    Prun,
94    /// `prun_with_memo(tokens, memo)` — remove and replace with an LLM-written summary.
95    PrunWithMemo,
96}
97
98/// Model-invocable tool for surgical context pruning.
99///
100/// The `pending` queue is shared with the agent loop via `AgentLoopConfig.prun_pending`
101/// (an `Arc<Mutex<Vec<PrunRequest>>>`). One `PrunTool` per variant; both variants
102/// share the same `pending` queue so cross-variant ordering is preserved.
103pub struct PrunTool {
104    /*
105    RUST QUIRK: `Arc<Mutex<Vec<PrunRequest>>>` — the canonical "shared mutable queue"
106
107    Three layers, each with a purpose:
108      `Vec<PrunRequest>`      — the queue itself; FIFO of pending requests
109      `Mutex<Vec<...>>`       — serialised access; only one thread mutates at a time
110      `Arc<Mutex<...>>`       — shared ownership across the tool, the agent loop,
111                                and (when parallel tool execution is on) sibling tools
112
113    `Arc::clone()` increments a reference count; cheap. `mutex.lock().unwrap()` blocks
114    until exclusive access is acquired. Drained between turns by the agent loop.
115
116    Python analogy: a `threading.Lock`-guarded `collections.deque` shared via a class
117    attribute — except Rust forces the locking discipline at compile time.
118    */
119    pending: Arc<Mutex<Vec<PrunRequest>>>,
120    /// Which of the two surface APIs this instance exposes; `name()`/`description()`
121    /// switch on it.
122    variant: PrunVariant,
123}
124
125impl PrunTool {
126    /// Create a new `PrunTool` bound to a shared `pending` queue.
127    ///
128    /// Call once per variant (Prun + PrunWithMemo) passing the same `Arc<Mutex<_>>`
129    /// so both tools enqueue into the same drain. `BasicAgent::with_prun_tool()`
130    /// does this wiring automatically.
131    pub fn new(pending: Arc<Mutex<Vec<PrunRequest>>>, variant: PrunVariant) -> Self {
132        Self { pending, variant }
133    }
134}
135
136#[async_trait::async_trait]
137impl AgentTool for PrunTool {
138    fn name(&self) -> &str {
139        match self.variant {
140            PrunVariant::Prun => "prun",
141            PrunVariant::PrunWithMemo => "prun_with_memo",
142        }
143    }
144
145    fn label(&self) -> &str {
146        match self.variant {
147            PrunVariant::Prun => "Prun",
148            PrunVariant::PrunWithMemo => "Prun with Memo",
149        }
150    }
151
152    fn description(&self) -> &str {
153        match self.variant {
154            PrunVariant::Prun => "Surgically remove the last N tokens of model-generated (in-run) context. Use when exploration or tool results waste context length. Pruned content is preserved in session log.",
155            PrunVariant::PrunWithMemo => "Surgically remove the last N tokens of in-run context and replace with a summary memo. Use when exploration had findings worth remembering but full content is too verbose.",
156        }
157    }
158
159    fn parameters_schema(&self) -> serde_json::Value {
160        match self.variant {
161            PrunVariant::Prun => serde_json::json!({
162                "type": "object",
163                "properties": {
164                    "tokens": {"type": "integer", "description": "Tokens to remove from tail of in-run context"}
165                },
166                "required": ["tokens"]
167            }),
168            PrunVariant::PrunWithMemo => serde_json::json!({
169                "type": "object",
170                "properties": {
171                    "tokens": {"type": "integer", "description": "Tokens to remove from tail of in-run context"},
172                    "memo": {"type": "string", "description": "Summary to insert in place of pruned content"}
173                },
174                "required": ["tokens", "memo"]
175            }),
176        }
177    }
178
179    /*
180    DESIGN: execute() enqueues; it does not prune.
181
182    The function looks oddly small for a tool — that's intentional. Real pruning is
183    performed by the agent loop between turns (see file-level ARCHITECTURE block).
184    All `execute()` does is:
185      1. Validate input (`tokens > 0`, plus `memo` for the with-memo variant).
186      2. Push a `PrunRequest` onto the shared queue.
187      3. Return a placeholder `ToolResult` so the LLM sees the call was accepted.
188
189    `_ctx` is intentionally unused — there's no I/O, no cancellation budget to honour,
190    no streaming output. The synthetic ToolResult will be observed by the LLM as
191    "your prun request was recorded"; the actual pruning takes effect before the
192    next prompt is built, replacing those messages in the context the LLM sees next.
193    */
194    async fn execute(
195        &self,
196        params: serde_json::Value, // LLM INPUT — `{"tokens": N}` or `{"tokens": N, "memo": "..."}`
197        _ctx: ToolContext,         // SYSTEM ENV — unused; pruning is deferred to the agent loop
198    ) -> Result<ToolResult, ToolError> {
199        // Validate `tokens` — must be a positive integer. A missing or non-integer
200        // value would otherwise silently default to 0 and produce a no-op enqueue.
201        let tokens = params.get("tokens").and_then(|v| v.as_u64()).unwrap_or(0) as usize;
202        if tokens == 0 {
203            return Err(ToolError::InvalidArgs("tokens must be > 0".to_string()));
204        }
205
206        // Extract the memo only for the with-memo variant. The bare `prun` variant
207        // ignores any memo field even if the LLM accidentally supplies one — this
208        // keeps the two tools' on-the-wire semantics strictly separate.
209        let memo = match self.variant {
210            PrunVariant::PrunWithMemo => params
211                .get("memo")
212                .and_then(|v| v.as_str())
213                .map(|s| s.to_string()),
214            PrunVariant::Prun => None,
215        };
216
217        // Enqueue. `.lock().unwrap()` panics on mutex poisoning, which would indicate
218        // a panic in a previous holder of the lock — a bug worth surfacing loudly.
219        // (Contrast with the steering-queue poison-tolerant lock in BasicAgent, where
220        // recoverable behaviour is preferred because hooks run user code; here the
221        // only writer is this tool plus the agent-loop drain, both internal.)
222        self.pending.lock().unwrap().push(PrunRequest {
223            tokens_to_remove: tokens,
224            memo,
225        });
226
227        // Synthetic acknowledgement message — the LLM sees this in the next turn's
228        // ToolResult. The actual pruning is invisible to the model except by the
229        // shorter context window it observes next turn.
230        Ok(ToolResult {
231            content: vec![Content::Text {
232                text: format!(
233                    "Prun request recorded: {} tokens will be removed before next turn.",
234                    tokens
235                ),
236            }],
237            details: serde_json::Value::Null,
238            child_loop_id: None,
239        })
240    }
241}