deepstrike_core/mm/handle.rs
1//! Primitive P3: the resource handle table + paging (context as address space).
2//!
3//! M0 scaffold (see `.local-docs/specs/agent-os-three-primitives.md`): types + a pure
4//! eviction-plan stub only — **no wiring, no behavior change**. A later milestone (M3, which is the
5//! compression optimization) builds a [`HandleTable`] over the context manager and replaces the
6//! scattered compactors in [`crate::context::compression`] with a single pure [`plan_eviction`].
7//!
8//! Concept overlap this primitive collapses: the 5-layer compression pyramid (5 compactors each
9//! deciding its own trigger) becomes one [`EvictionPlan`] of uniform [`EvictionOp`]s; page-out (④)
10//! and long-term memory residency (⑦) ride on [`Residency`].
11
12use compact_str::CompactString;
13use serde::{Deserialize, Serialize};
14
15use crate::context::pressure::PressureAction;
16use crate::mm::MemoryTierHint;
17
18/// Opaque handle id. M3 assigns these as tool results / knowledge / memory pages enter context.
19pub type HandleId = u32;
20
21/// What a handle refers to.
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
23#[serde(rename_all = "snake_case")]
24pub enum HandleKind {
25 /// A tool result occupying working context.
26 ToolResult,
27 /// A working-memory page (compressible / pageable).
28 MemoryPage,
29 /// A knowledge entry paged in from long-term storage.
30 KnowledgeEntry,
31 /// A large result spooled to disk with a preview left in context (Layer 1).
32 SpoolFile,
33 /// A sub-agent join result occupying context.
34 SubAgentJoin,
35}
36
37/// Where a handle's content currently lives. Page-in/page-out are transitions on this.
38#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
39#[serde(rename_all = "snake_case")]
40pub enum Residency {
41 /// Full content present in working context.
42 Resident,
43 /// Content written to disk; a preview reference remains (Layer 1 spool).
44 SpooledOut { r: String },
45 /// Content archived to long-term storage at the given tier (page-out).
46 PagedOut { tier: MemoryTierHint },
47 /// Original kept locally but projected out of the rendered view (Layer 4 read-time projection).
48 Collapsed,
49}
50
51impl Residency {
52 pub fn label(&self) -> &'static str {
53 match self {
54 Self::Resident => "resident",
55 Self::SpooledOut { .. } => "spooled_out",
56 Self::PagedOut { .. } => "paged_out",
57 Self::Collapsed => "collapsed",
58 }
59 }
60
61 /// Whether the handle's full content currently counts against the token budget.
62 pub fn occupies_context(&self) -> bool {
63 matches!(self, Self::Resident)
64 }
65}
66
67/// One addressable resource the agent holds.
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub struct Handle {
70 pub id: HandleId,
71 pub kind: HandleKind,
72 pub residency: Residency,
73 /// Token cost of the resident form (used by the eviction planner).
74 pub tokens: u32,
75 /// Link back to the source object in working context — for [`HandleKind::ToolResult`] this is
76 /// the tool `call_id`, letting the renderer project a handle's residency onto its message
77 /// (read-time projection) without mutating the stored message. `None` for handles with no
78 /// in-context anchor.
79 #[serde(default, skip_serializing_if = "Option::is_none")]
80 pub source: Option<CompactString>,
81}
82
83impl Handle {
84 pub fn resident(id: HandleId, kind: HandleKind, tokens: u32) -> Self {
85 Self { id, kind, residency: Residency::Resident, tokens, source: None }
86 }
87
88 /// A resident handle anchored to a source object (e.g. a tool `call_id`).
89 pub fn resident_for(
90 id: HandleId,
91 kind: HandleKind,
92 tokens: u32,
93 source: impl Into<CompactString>,
94 ) -> Self {
95 Self { id, kind, residency: Residency::Resident, tokens, source: Some(source.into()) }
96 }
97}
98
99/// Per-task handle table. M3 makes the context manager's partitions a view over this.
100#[derive(Debug, Clone, Default, Serialize, Deserialize)]
101pub struct HandleTable {
102 handles: Vec<Handle>,
103}
104
105impl HandleTable {
106 pub fn new() -> Self {
107 Self::default()
108 }
109
110 pub fn insert(&mut self, handle: Handle) {
111 if let Some(existing) = self.handles.iter_mut().find(|h| h.id == handle.id) {
112 *existing = handle;
113 } else {
114 self.handles.push(handle);
115 }
116 }
117
118 pub fn get(&self, id: HandleId) -> Option<&Handle> {
119 self.handles.iter().find(|h| h.id == id)
120 }
121
122 pub fn get_mut(&mut self, id: HandleId) -> Option<&mut Handle> {
123 self.handles.iter_mut().find(|h| h.id == id)
124 }
125
126 pub fn all(&self) -> &[Handle] {
127 &self.handles
128 }
129
130 pub fn all_mut(&mut self) -> &mut [Handle] {
131 &mut self.handles
132 }
133
134 /// Retain only the handles for which `keep` returns true; drop the rest. The GC primitive the
135 /// context manager uses to evict handles whose backing message has left working context
136 /// (archived by compression / dropped on renewal) — bounding the table to the working set
137 /// instead of growing with total session length.
138 pub fn retain(&mut self, keep: impl FnMut(&Handle) -> bool) {
139 self.handles.retain(keep);
140 }
141
142 /// Residency of the handle anchored to `source` (e.g. a tool `call_id`), if any.
143 /// The renderer uses this to project a tool result without touching the stored message.
144 pub fn residency_for_source(&self, source: &str) -> Option<&Residency> {
145 self.handles
146 .iter()
147 .find(|h| h.source.as_deref() == Some(source))
148 .map(|h| &h.residency)
149 }
150
151 /// Tool-result handles in insertion (recency) order — oldest first. Used by the residency
152 /// planner to decide which older results to project out under context pressure.
153 pub fn tool_result_handles_mut(&mut self) -> impl Iterator<Item = &mut Handle> {
154 self.handles
155 .iter_mut()
156 .filter(|h| matches!(h.kind, HandleKind::ToolResult))
157 }
158
159 /// Sum of tokens for handles still occupying working context.
160 pub fn resident_tokens(&self) -> u32 {
161 self.handles
162 .iter()
163 .filter(|h| h.residency.occupies_context())
164 .map(|h| h.tokens)
165 .sum()
166 }
167
168 /// Sum of tokens for handles that have left working context (`Collapsed` / `SpooledOut` /
169 /// `PagedOut`). Their anchored messages still sit in `partitions` at full weight (collapse is
170 /// non-destructive), so this is exactly the over-count that the *estimate* rho path must
171 /// discount to become paging-aware — see [`crate::context::manager::ContextManager::effective_rho`].
172 pub fn non_resident_tokens(&self) -> u32 {
173 self.handles
174 .iter()
175 .filter(|h| !h.residency.occupies_context())
176 .map(|h| h.tokens)
177 .sum()
178 }
179}
180
181/// One ordered eviction action in an [`EvictionPlan`]. Maps the pressure pyramid onto explicit
182/// ops the planner emits directly (the old `Pressure(PressureAction)` umbrella is deleted), each
183/// annotated with cache-aware metadata via [`EvictionOp::invalidates_prefix_at`].
184///
185/// P1-6 (async LLM semantic summary) is **not** a distinct op here: every archiving op already
186/// emits the drained messages as `archived` on the `Compressed` observation, and the SDK upgrades
187/// that summary out-of-band (LLM call = SDK I/O, a kernel non-goal), writing back a second
188/// `compressed` event. A separate in-kernel `Summarize` op would be a never-produced dead variant.
189///
190/// **Layer boundary vs [`crate::context::pressure::PressureAction`] (do not collapse the two):**
191/// `EvictionOp` is the *planner-op* vocabulary — what `plan_eviction` decides to do, carrying the
192/// per-op payload (`target_tokens` / `per_msg_ratio` / `preserve_turns`). `PressureAction` is the
193/// *pressure-level* vocabulary owned by the pressure subsystem: it is what `PressureMonitor::recommend`
194/// and `ContextManager::should_compress` return, the `Ord`-keyed cascade selector inside the
195/// compression pipeline, and the canonical wire label. They map ~1:1 by layer but are not redundant —
196/// `Spool` / `TimeDecayMicro` don't sit on the linear pressure cascade, and `PressureAction` carries no
197/// per-op data. The one bridge is `execute_eviction_op`, which is the intended seam, not duplication.
198#[derive(Debug, Clone)]
199pub enum EvictionOp {
200 /// Layer 1: spool a large handle to disk, keep a preview reference in context.
201 Spool(HandleId),
202 /// Layer 2: cap oversized messages at a per-message token limit (in-place rewrite).
203 Snip { per_msg_ratio: f64 },
204 /// Layer 3: idle/time-decay micro-compact — excerpt large tool results to placeholders.
205 /// Independent of rho; stamps `last_compact_ms` and uses the non-time compress path.
206 TimeDecayMicro,
207 /// Layer 4: collapse (read-time projection) — drop oldest messages until within target.
208 /// Now a distinct op (no longer bundled under `Pressure`), so the planner can annotate it
209 /// with cache-aware metadata and order it explicitly.
210 Collapse { target_tokens: u32 },
211 /// Layer 5: auto-compact — collapse history entirely except last K turns. Distinct from Collapse
212 /// for the same reason: the planner needs to control ordering and metadata.
213 AutoCompact { preserve_turns: usize },
214}
215
216impl EvictionOp {
217 pub fn label(&self) -> &'static str {
218 match self {
219 Self::Spool(_) => "spool",
220 Self::Snip { .. } => "snip",
221 Self::TimeDecayMicro => "time_decay_micro",
222 Self::Collapse { .. } => "collapse",
223 Self::AutoCompact { .. } => "auto_compact",
224 }
225 }
226
227 /// Cache-aware metadata: the message index at which this op invalidates the prompt cache
228 /// prefix, if any. `None` = prefix-safe (op only affects late content or is layer-1 spool).
229 /// Earlier index = higher cache cost (Anthropic cache keys off the first N messages).
230 pub fn invalidates_prefix_at(&self) -> Option<usize> {
231 match self {
232 // Spool: layer-1 disk spool of single large result; no message reordering → no impact.
233 Self::Spool(_) => None,
234 // Snip: in-place rewrite of oversized messages anywhere in history. May hit early
235 // messages if an early turn was oversized → conservative: assume prefix invalidation.
236 Self::Snip { .. } => Some(0), // Conservative: may affect any message including early ones.
237 // TimeDecayMicro: excerpts large tool results to placeholders. Tool results are always
238 // interleaved (after their call), so they're typically mid/late history. Assuming the
239 // system prompt + first few user messages are untouched → prefix-safe for most sessions.
240 Self::TimeDecayMicro => None,
241 // Collapse: drops oldest messages to reach target. By definition modifies early history
242 // → prefix invalidation at the drop point.
243 Self::Collapse { .. } => Some(0),
244 // AutoCompact: drops all but last K turns → even more aggressive prefix invalidation.
245 Self::AutoCompact { .. } => Some(0),
246 }
247 }
248}
249
250/// An ordered set of eviction actions returned by the planner. Empty = no compression needed
251/// ("能不压就不压"). The order is the execution order.
252#[derive(Debug, Clone, Default)]
253pub struct EvictionPlan {
254 pub ops: Vec<EvictionOp>,
255}
256
257impl EvictionPlan {
258 pub fn empty() -> Self {
259 Self::default()
260 }
261
262 pub fn is_empty(&self) -> bool {
263 self.ops.is_empty()
264 }
265
266 /// Whether the plan includes the Layer-3 idle/time-decay micro op.
267 pub fn has_time_decay(&self) -> bool {
268 self.ops.iter().any(|op| matches!(op, EvictionOp::TimeDecayMicro))
269 }
270
271 /// Map legacy `PressureAction` → the new specific op (for behavior-preserving migration).
272 /// The old `recommend()` returns one of 5 actions; we map them 1:1 onto the new ops.
273 pub fn from_legacy_action(action: PressureAction, target_tokens: u32, preserve_turns: usize) -> Self {
274 let ops = match action {
275 PressureAction::None => vec![],
276 PressureAction::SnipCompact => vec![EvictionOp::Snip { per_msg_ratio: 0.10 }],
277 PressureAction::MicroCompact => vec![EvictionOp::TimeDecayMicro],
278 PressureAction::ContextCollapse => vec![EvictionOp::Collapse { target_tokens }],
279 PressureAction::AutoCompact => vec![EvictionOp::AutoCompact { preserve_turns }],
280 };
281 Self { ops }
282 }
283}
284
285/// Layer-1 spool decision for a single tool result (kernel decides; SDK writes to disk).
286#[derive(Debug, Clone, PartialEq, Eq)]
287pub struct SpoolDecision {
288 /// Byte size of the full (un-spooled) output.
289 pub original_size: u32,
290 /// The preview text the kernel keeps in working context in place of the full output.
291 pub preview: String,
292}
293
294/// Pure Layer-1 spool planner: if `output` exceeds `threshold_bytes` (and threshold > 0), return a
295/// [`SpoolDecision`] whose `preview` is the first `preview_bytes` (truncated at a char boundary)
296/// plus a marker. `None` means keep the output inline. The kernel keeps `preview` in context and
297/// emits `LargeResultSpooled`; the SDK persists the full content to disk. No I/O here.
298pub fn plan_spool(output: &str, threshold_bytes: u32, preview_bytes: u32) -> Option<SpoolDecision> {
299 let size = output.len();
300 if threshold_bytes == 0 || size <= threshold_bytes as usize {
301 return None;
302 }
303 let mut end = (preview_bytes as usize).min(size);
304 while end > 0 && !output.is_char_boundary(end) {
305 end -= 1;
306 }
307 let preview = format!(
308 "{}\n[…tool result spooled: {} bytes total, {} byte preview shown; full content persisted to disk by the SDK…]",
309 &output[..end], size, end
310 );
311 Some(SpoolDecision { original_size: size as u32, preview })
312}
313
314/// Pure eviction planner (M3): the **single decision point** for the per-turn compression
315/// checkpoint. Packages the two previously-scattered decisions — Layer-3 idle/time-decay and the
316/// rho-driven pressure recommendation — into one ordered [`EvictionPlan`], in execution order
317/// (time-decay micro first, then the pressure action). Behavior-preserving: the inputs are exactly
318/// what the state machine already computed (`ContextManager::should_time_decay_compact` and
319/// `PressureMonitor::recommend`); this only centralizes their ordering and makes the plan testable.
320///
321/// Layer-1 spool is decided at tool-result ingestion (handle size), not here.
322///
323/// W1-1 收口: `target_tokens` / `preserve_turns` are the **real** config-derived values supplied by
324/// the caller (`ContextManager::plan_compaction_params`), so the emitted ops carry truthful params
325/// instead of the old magic-number placeholders. The plan is now the single decision point for *what*
326/// to compact and *to what target*; the executor honors `Collapse { target_tokens }` verbatim rather
327/// than re-deriving it. (The richer `(rho, idle_ms, &HandleTable, &cfg)` signature with explicit
328/// cache-cost ordering remains a future refinement; the `invalidates_prefix_at` metadata is already
329/// carried per op.)
330pub fn plan_eviction(
331 recommended: PressureAction,
332 idle_decay: bool,
333 target_tokens: u32,
334 preserve_turns: usize,
335) -> EvictionPlan {
336 let mut ops = Vec::new();
337 if idle_decay {
338 ops.push(EvictionOp::TimeDecayMicro);
339 }
340 // Map the pressure recommendation to a specific op; `None` yields an empty plan (no op appended).
341 if recommended != PressureAction::None {
342 ops.extend(EvictionPlan::from_legacy_action(recommended, target_tokens, preserve_turns).ops);
343 }
344 EvictionPlan { ops }
345}
346
347#[cfg(test)]
348mod tests {
349 use super::*;
350
351 #[test]
352 fn resident_tokens_counts_only_resident() {
353 let mut table = HandleTable::new();
354 table.insert(Handle::resident(1, HandleKind::ToolResult, 100));
355 table.insert(Handle {
356 id: 2,
357 kind: HandleKind::SpoolFile,
358 residency: Residency::SpooledOut { r: "disk://x".into() },
359 tokens: 5000,
360 source: None,
361 });
362 table.insert(Handle {
363 id: 3,
364 kind: HandleKind::MemoryPage,
365 residency: Residency::Collapsed,
366 tokens: 200,
367 source: None,
368 });
369 assert_eq!(table.resident_tokens(), 100);
370 }
371
372 #[test]
373 fn handle_table_insert_is_idempotent_by_id() {
374 let mut table = HandleTable::new();
375 table.insert(Handle::resident(1, HandleKind::ToolResult, 100));
376 table.insert(Handle::resident(1, HandleKind::ToolResult, 250));
377 assert_eq!(table.all().len(), 1);
378 assert_eq!(table.get(1).unwrap().tokens, 250);
379 }
380
381 #[test]
382 fn residency_occupies_context_only_when_resident() {
383 assert!(Residency::Resident.occupies_context());
384 assert!(!Residency::Collapsed.occupies_context());
385 assert!(!Residency::PagedOut { tier: MemoryTierHint::Semantic }.occupies_context());
386 }
387
388 #[test]
389 fn plan_eviction_empty_when_no_pressure_and_no_idle() {
390 assert!(plan_eviction(PressureAction::None, false, 50_000, 2).is_empty());
391 }
392
393 #[test]
394 fn plan_eviction_emits_specific_op_for_recommended_action() {
395 let plan = plan_eviction(PressureAction::AutoCompact, false, 50_000, 3);
396 // The op carries the real preserve_turns the caller passed, not a placeholder.
397 assert!(matches!(&plan.ops[..], [EvictionOp::AutoCompact { preserve_turns: 3 }]));
398 }
399
400 #[test]
401 fn plan_eviction_collapse_carries_caller_target_tokens() {
402 // W1-1 收口: the planner stamps the caller's real target into the Collapse op (no placeholder),
403 // and the executor honors it verbatim.
404 let plan = plan_eviction(PressureAction::ContextCollapse, false, 12_345, 2);
405 assert!(matches!(&plan.ops[..], [EvictionOp::Collapse { target_tokens: 12_345 }]));
406 }
407
408 #[test]
409 fn plan_eviction_orders_time_decay_before_pressure() {
410 // Idle + rho both fire: time-decay micro runs first, then the specific op — matching
411 // the legacy checkpoint order exactly.
412 let plan = plan_eviction(PressureAction::ContextCollapse, true, 50_000, 2);
413 assert_eq!(plan.ops.len(), 2);
414 assert!(matches!(plan.ops[0], EvictionOp::TimeDecayMicro));
415 assert!(matches!(plan.ops[1], EvictionOp::Collapse { .. }));
416 }
417
418 #[test]
419 fn plan_eviction_time_decay_only() {
420 let plan = plan_eviction(PressureAction::None, true, 50_000, 2);
421 assert_eq!(plan.ops.len(), 1);
422 assert!(matches!(plan.ops[0], EvictionOp::TimeDecayMicro));
423 }
424
425 #[test]
426 fn plan_eviction_micro_compact_emits_time_decay_without_idle() {
427 // Regression: a pressure-driven MicroCompact emits a TimeDecayMicro op *independent* of the
428 // idle-decay flag. So `has_time_decay()` can be true while `idle_decay` is false — the state
429 // machine's compaction checkpoint must assert the implication (`idle_decay ⇒ has_time_decay`),
430 // NOT equality (the old `debug_assert_eq!(has_time_decay, idle_decay)` wrongly aborted here).
431 let plan = plan_eviction(PressureAction::MicroCompact, false, 50_000, 2);
432 assert!(plan.has_time_decay(), "MicroCompact yields a time-decay op even when not idle");
433 // And the checkpoint invariant the fixed assertion encodes holds for every combination:
434 for recommended in [
435 PressureAction::None,
436 PressureAction::MicroCompact,
437 PressureAction::AutoCompact,
438 PressureAction::ContextCollapse,
439 ] {
440 for idle in [false, true] {
441 let p = plan_eviction(recommended, idle, 50_000, 2);
442 assert!(!idle || p.has_time_decay(), "idle_decay must imply a time-decay op");
443 }
444 }
445 }
446
447 #[test]
448 fn eviction_op_labels() {
449 assert_eq!(EvictionOp::Spool(1).label(), "spool");
450 assert_eq!(EvictionOp::Snip { per_msg_ratio: 0.1 }.label(), "snip");
451 assert_eq!(EvictionOp::TimeDecayMicro.label(), "time_decay_micro");
452 assert_eq!(EvictionOp::Collapse { target_tokens: 5000 }.label(), "collapse");
453 assert_eq!(EvictionOp::AutoCompact { preserve_turns: 2 }.label(), "auto_compact");
454 }
455
456 #[test]
457 fn plan_spool_keeps_small_output_inline() {
458 assert_eq!(plan_spool("small", 50, 16), None);
459 // threshold 0 disables spooling.
460 assert_eq!(plan_spool(&"x".repeat(1000), 0, 16), None);
461 }
462
463 #[test]
464 fn plan_spool_previews_large_output() {
465 let output = "y".repeat(1000);
466 let d = plan_spool(&output, 100, 32).expect("should spool");
467 assert_eq!(d.original_size, 1000);
468 assert!(d.preview.starts_with(&"y".repeat(32)));
469 assert!(d.preview.contains("1000 bytes total"));
470 assert!(d.preview.len() < output.len());
471 }
472
473 #[test]
474 fn plan_spool_truncates_on_char_boundary() {
475 // multi-byte chars: preview cut must not split a char.
476 let output = "🚀".repeat(100); // 4 bytes each = 400 bytes
477 let d = plan_spool(&output, 50, 10).expect("should spool");
478 // No panic / valid UTF-8 preview is the assertion.
479 assert!(d.preview.contains("400 bytes total"));
480 }
481}