Skip to main content

deepstrike_core/mm/
handle.rs

1//! Primitive P3: the resource handle table + paging (context as address space).
2//!
3//! M0 scaffold (see `.local-docs/specs/agent-os-three-primitives.md`): types + a pure
4//! eviction-plan stub only — **no wiring, no behavior change**. A later milestone (M3, which is the
5//! compression optimization) builds a [`HandleTable`] over the context manager and replaces the
6//! scattered compactors in [`crate::context::compression`] with a single pure [`plan_eviction`].
7//!
8//! Concept overlap this primitive collapses: the 5-layer compression pyramid (5 compactors each
9//! deciding its own trigger) becomes one [`EvictionPlan`] of uniform [`EvictionOp`]s; page-out (④)
10//! and long-term memory residency (⑦) ride on [`Residency`].
11
12use compact_str::CompactString;
13use serde::{Deserialize, Serialize};
14
15use crate::context::pressure::PressureAction;
16use crate::mm::MemoryTierHint;
17
18/// Opaque handle id. M3 assigns these as tool results / knowledge / memory pages enter context.
19pub type HandleId = u32;
20
21/// What a handle refers to.
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
23#[serde(rename_all = "snake_case")]
24pub enum HandleKind {
25    /// A tool result occupying working context.
26    ToolResult,
27    /// A working-memory page (compressible / pageable).
28    MemoryPage,
29    /// A knowledge entry paged in from long-term storage.
30    KnowledgeEntry,
31    /// A large result spooled to disk with a preview left in context (Layer 1).
32    SpoolFile,
33    /// A sub-agent join result occupying context.
34    SubAgentJoin,
35}
36
37/// Where a handle's content currently lives. Page-in/page-out are transitions on this.
38#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
39#[serde(rename_all = "snake_case")]
40pub enum Residency {
41    /// Full content present in working context.
42    Resident,
43    /// Content written to disk; a preview reference remains (Layer 1 spool).
44    SpooledOut { r: String },
45    /// Content archived to long-term storage at the given tier (page-out).
46    PagedOut { tier: MemoryTierHint },
47    /// Original kept locally but projected out of the rendered view (Layer 4 read-time projection).
48    Collapsed,
49}
50
51impl Residency {
52    pub fn label(&self) -> &'static str {
53        match self {
54            Self::Resident => "resident",
55            Self::SpooledOut { .. } => "spooled_out",
56            Self::PagedOut { .. } => "paged_out",
57            Self::Collapsed => "collapsed",
58        }
59    }
60
61    /// Whether the handle's full content currently counts against the token budget.
62    pub fn occupies_context(&self) -> bool {
63        matches!(self, Self::Resident)
64    }
65}
66
67/// One addressable resource the agent holds.
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub struct Handle {
70    pub id: HandleId,
71    pub kind: HandleKind,
72    pub residency: Residency,
73    /// Token cost of the resident form (used by the eviction planner).
74    pub tokens: u32,
75    /// Link back to the source object in working context — for [`HandleKind::ToolResult`] this is
76    /// the tool `call_id`, letting the renderer project a handle's residency onto its message
77    /// (read-time projection) without mutating the stored message. `None` for handles with no
78    /// in-context anchor.
79    #[serde(default, skip_serializing_if = "Option::is_none")]
80    pub source: Option<CompactString>,
81}
82
83impl Handle {
84    pub fn resident(id: HandleId, kind: HandleKind, tokens: u32) -> Self {
85        Self { id, kind, residency: Residency::Resident, tokens, source: None }
86    }
87
88    /// A resident handle anchored to a source object (e.g. a tool `call_id`).
89    pub fn resident_for(
90        id: HandleId,
91        kind: HandleKind,
92        tokens: u32,
93        source: impl Into<CompactString>,
94    ) -> Self {
95        Self { id, kind, residency: Residency::Resident, tokens, source: Some(source.into()) }
96    }
97}
98
99/// Per-task handle table. M3 makes the context manager's partitions a view over this.
100#[derive(Debug, Clone, Default, Serialize, Deserialize)]
101pub struct HandleTable {
102    handles: Vec<Handle>,
103}
104
105impl HandleTable {
106    pub fn new() -> Self {
107        Self::default()
108    }
109
110    pub fn insert(&mut self, handle: Handle) {
111        if let Some(existing) = self.handles.iter_mut().find(|h| h.id == handle.id) {
112            *existing = handle;
113        } else {
114            self.handles.push(handle);
115        }
116    }
117
118    pub fn get(&self, id: HandleId) -> Option<&Handle> {
119        self.handles.iter().find(|h| h.id == id)
120    }
121
122    pub fn get_mut(&mut self, id: HandleId) -> Option<&mut Handle> {
123        self.handles.iter_mut().find(|h| h.id == id)
124    }
125
126    pub fn all(&self) -> &[Handle] {
127        &self.handles
128    }
129
130    pub fn all_mut(&mut self) -> &mut [Handle] {
131        &mut self.handles
132    }
133
134    /// Retain only the handles for which `keep` returns true; drop the rest. The GC primitive the
135    /// context manager uses to evict handles whose backing message has left working context
136    /// (archived by compression / dropped on renewal) — bounding the table to the working set
137    /// instead of growing with total session length.
138    pub fn retain(&mut self, keep: impl FnMut(&Handle) -> bool) {
139        self.handles.retain(keep);
140    }
141
142    /// Residency of the handle anchored to `source` (e.g. a tool `call_id`), if any.
143    /// The renderer uses this to project a tool result without touching the stored message.
144    pub fn residency_for_source(&self, source: &str) -> Option<&Residency> {
145        self.handles
146            .iter()
147            .find(|h| h.source.as_deref() == Some(source))
148            .map(|h| &h.residency)
149    }
150
151    /// Tool-result handles in insertion (recency) order — oldest first. Used by the residency
152    /// planner to decide which older results to project out under context pressure.
153    pub fn tool_result_handles_mut(&mut self) -> impl Iterator<Item = &mut Handle> {
154        self.handles
155            .iter_mut()
156            .filter(|h| matches!(h.kind, HandleKind::ToolResult))
157    }
158
159    /// Sum of tokens for handles still occupying working context.
160    pub fn resident_tokens(&self) -> u32 {
161        self.handles
162            .iter()
163            .filter(|h| h.residency.occupies_context())
164            .map(|h| h.tokens)
165            .sum()
166    }
167
168    /// Sum of tokens for handles that have left working context (`Collapsed` / `SpooledOut` /
169    /// `PagedOut`). Their anchored messages still sit in `partitions` at full weight (collapse is
170    /// non-destructive), so this is exactly the over-count that the *estimate* rho path must
171    /// discount to become paging-aware — see [`crate::context::manager::ContextManager::effective_rho`].
172    pub fn non_resident_tokens(&self) -> u32 {
173        self.handles
174            .iter()
175            .filter(|h| !h.residency.occupies_context())
176            .map(|h| h.tokens)
177            .sum()
178    }
179}
180
181/// One ordered eviction action in an [`EvictionPlan`]. Maps the pressure pyramid onto explicit
182/// ops the planner emits directly (the old `Pressure(PressureAction)` umbrella is deleted), each
183/// annotated with cache-aware metadata via [`EvictionOp::invalidates_prefix_at`].
184///
185/// P1-6 (async LLM semantic summary) is **not** a distinct op here: every archiving op already
186/// emits the drained messages as `archived` on the `Compressed` observation, and the SDK upgrades
187/// that summary out-of-band (LLM call = SDK I/O, a kernel non-goal), writing back a second
188/// `compressed` event. A separate in-kernel `Summarize` op would be a never-produced dead variant.
189///
190/// **Layer boundary vs [`crate::context::pressure::PressureAction`] (do not collapse the two):**
191/// `EvictionOp` is the *planner-op* vocabulary — what `plan_eviction` decides to do, carrying the
192/// per-op payload (`target_tokens` / `per_msg_ratio` / `preserve_turns`). `PressureAction` is the
193/// *pressure-level* vocabulary owned by the pressure subsystem: it is what `PressureMonitor::recommend`
194/// and `ContextManager::should_compress` return, the `Ord`-keyed cascade selector inside the
195/// compression pipeline, and the canonical wire label. They map ~1:1 by layer but are not redundant —
196/// `Spool` / `TimeDecayMicro` don't sit on the linear pressure cascade, and `PressureAction` carries no
197/// per-op data. The one bridge is `execute_eviction_op`, which is the intended seam, not duplication.
198#[derive(Debug, Clone)]
199pub enum EvictionOp {
200    /// Layer 1: spool a large handle to disk, keep a preview reference in context.
201    Spool(HandleId),
202    /// Layer 2: cap oversized messages at a per-message token limit (in-place rewrite).
203    Snip { per_msg_ratio: f64 },
204    /// Layer 3: idle/time-decay micro-compact — excerpt large tool results to placeholders.
205    /// Independent of rho; stamps `last_compact_ms` and uses the non-time compress path.
206    TimeDecayMicro,
207    /// Layer 4: collapse (read-time projection) — drop oldest messages until within target.
208    /// Now a distinct op (no longer bundled under `Pressure`), so the planner can annotate it
209    /// with cache-aware metadata and order it explicitly.
210    Collapse { target_tokens: u32 },
211    /// Layer 5: auto-compact — collapse history entirely except last K turns. Distinct from Collapse
212    /// for the same reason: the planner needs to control ordering and metadata.
213    AutoCompact { preserve_turns: usize },
214}
215
216impl EvictionOp {
217    pub fn label(&self) -> &'static str {
218        match self {
219            Self::Spool(_) => "spool",
220            Self::Snip { .. } => "snip",
221            Self::TimeDecayMicro => "time_decay_micro",
222            Self::Collapse { .. } => "collapse",
223            Self::AutoCompact { .. } => "auto_compact",
224        }
225    }
226
227    /// Cache-aware metadata: the message index at which this op invalidates the prompt cache
228    /// prefix, if any. `None` = prefix-safe (op only affects late content or is layer-1 spool).
229    /// Earlier index = higher cache cost (Anthropic cache keys off the first N messages).
230    pub fn invalidates_prefix_at(&self) -> Option<usize> {
231        match self {
232            // Spool: layer-1 disk spool of single large result; no message reordering → no impact.
233            Self::Spool(_) => None,
234            // Snip: in-place rewrite of oversized messages anywhere in history. May hit early
235            // messages if an early turn was oversized → conservative: assume prefix invalidation.
236            Self::Snip { .. } => Some(0), // Conservative: may affect any message including early ones.
237            // TimeDecayMicro: excerpts large tool results to placeholders. Tool results are always
238            // interleaved (after their call), so they're typically mid/late history. Assuming the
239            // system prompt + first few user messages are untouched → prefix-safe for most sessions.
240            Self::TimeDecayMicro => None,
241            // Collapse: drops oldest messages to reach target. By definition modifies early history
242            // → prefix invalidation at the drop point.
243            Self::Collapse { .. } => Some(0),
244            // AutoCompact: drops all but last K turns → even more aggressive prefix invalidation.
245            Self::AutoCompact { .. } => Some(0),
246        }
247    }
248}
249
250/// An ordered set of eviction actions returned by the planner. Empty = no compression needed
251/// ("能不压就不压"). The order is the execution order.
252#[derive(Debug, Clone, Default)]
253pub struct EvictionPlan {
254    pub ops: Vec<EvictionOp>,
255}
256
257impl EvictionPlan {
258    pub fn empty() -> Self {
259        Self::default()
260    }
261
262    pub fn is_empty(&self) -> bool {
263        self.ops.is_empty()
264    }
265
266    /// Whether the plan includes the Layer-3 idle/time-decay micro op.
267    pub fn has_time_decay(&self) -> bool {
268        self.ops.iter().any(|op| matches!(op, EvictionOp::TimeDecayMicro))
269    }
270
271    /// Map legacy `PressureAction` → the new specific op (for behavior-preserving migration).
272    /// The old `recommend()` returns one of 5 actions; we map them 1:1 onto the new ops.
273    pub fn from_legacy_action(action: PressureAction, target_tokens: u32, preserve_turns: usize) -> Self {
274        let ops = match action {
275            PressureAction::None => vec![],
276            PressureAction::SnipCompact => vec![EvictionOp::Snip { per_msg_ratio: 0.10 }],
277            PressureAction::MicroCompact => vec![EvictionOp::TimeDecayMicro],
278            PressureAction::ContextCollapse => vec![EvictionOp::Collapse { target_tokens }],
279            PressureAction::AutoCompact => vec![EvictionOp::AutoCompact { preserve_turns }],
280        };
281        Self { ops }
282    }
283}
284
285/// Layer-1 spool decision for a single tool result (kernel decides; SDK writes to disk).
286#[derive(Debug, Clone, PartialEq, Eq)]
287pub struct SpoolDecision {
288    /// Byte size of the full (un-spooled) output.
289    pub original_size: u32,
290    /// The preview text the kernel keeps in working context in place of the full output.
291    pub preview: String,
292}
293
294/// Pure Layer-1 spool planner: if `output` exceeds `threshold_bytes` (and threshold > 0), return a
295/// [`SpoolDecision`] whose `preview` is the first `preview_bytes` (truncated at a char boundary)
296/// plus a marker. `None` means keep the output inline. The kernel keeps `preview` in context and
297/// emits `LargeResultSpooled`; the SDK persists the full content to disk. No I/O here.
298pub fn plan_spool(output: &str, threshold_bytes: u32, preview_bytes: u32) -> Option<SpoolDecision> {
299    let size = output.len();
300    if threshold_bytes == 0 || size <= threshold_bytes as usize {
301        return None;
302    }
303    let mut end = (preview_bytes as usize).min(size);
304    while end > 0 && !output.is_char_boundary(end) {
305        end -= 1;
306    }
307    let preview = format!(
308        "{}\n[…tool result spooled: {} bytes total, {} byte preview shown; full content persisted to disk by the SDK…]",
309        &output[..end], size, end
310    );
311    Some(SpoolDecision { original_size: size as u32, preview })
312}
313
314/// Pure eviction planner (M3): the **single decision point** for the per-turn compression
315/// checkpoint. Packages the two previously-scattered decisions — Layer-3 idle/time-decay and the
316/// rho-driven pressure recommendation — into one ordered [`EvictionPlan`], in execution order
317/// (time-decay micro first, then the pressure action). Behavior-preserving: the inputs are exactly
318/// what the state machine already computed (`ContextManager::should_time_decay_compact` and
319/// `PressureMonitor::recommend`); this only centralizes their ordering and makes the plan testable.
320///
321/// Layer-1 spool is decided at tool-result ingestion (handle size), not here.
322///
323/// W1-1 收口: `target_tokens` / `preserve_turns` are the **real** config-derived values supplied by
324/// the caller (`ContextManager::plan_compaction_params`), so the emitted ops carry truthful params
325/// instead of the old magic-number placeholders. The plan is now the single decision point for *what*
326/// to compact and *to what target*; the executor honors `Collapse { target_tokens }` verbatim rather
327/// than re-deriving it. (The richer `(rho, idle_ms, &HandleTable, &cfg)` signature with explicit
328/// cache-cost ordering remains a future refinement; the `invalidates_prefix_at` metadata is already
329/// carried per op.)
330pub fn plan_eviction(
331    recommended: PressureAction,
332    idle_decay: bool,
333    target_tokens: u32,
334    preserve_turns: usize,
335) -> EvictionPlan {
336    let mut ops = Vec::new();
337    if idle_decay {
338        ops.push(EvictionOp::TimeDecayMicro);
339    }
340    // Map the pressure recommendation to a specific op; `None` yields an empty plan (no op appended).
341    if recommended != PressureAction::None {
342        ops.extend(EvictionPlan::from_legacy_action(recommended, target_tokens, preserve_turns).ops);
343    }
344    EvictionPlan { ops }
345}
346
347#[cfg(test)]
348mod tests {
349    use super::*;
350
351    #[test]
352    fn resident_tokens_counts_only_resident() {
353        let mut table = HandleTable::new();
354        table.insert(Handle::resident(1, HandleKind::ToolResult, 100));
355        table.insert(Handle {
356            id: 2,
357            kind: HandleKind::SpoolFile,
358            residency: Residency::SpooledOut { r: "disk://x".into() },
359            tokens: 5000,
360            source: None,
361        });
362        table.insert(Handle {
363            id: 3,
364            kind: HandleKind::MemoryPage,
365            residency: Residency::Collapsed,
366            tokens: 200,
367            source: None,
368        });
369        assert_eq!(table.resident_tokens(), 100);
370    }
371
372    #[test]
373    fn handle_table_insert_is_idempotent_by_id() {
374        let mut table = HandleTable::new();
375        table.insert(Handle::resident(1, HandleKind::ToolResult, 100));
376        table.insert(Handle::resident(1, HandleKind::ToolResult, 250));
377        assert_eq!(table.all().len(), 1);
378        assert_eq!(table.get(1).unwrap().tokens, 250);
379    }
380
381    #[test]
382    fn residency_occupies_context_only_when_resident() {
383        assert!(Residency::Resident.occupies_context());
384        assert!(!Residency::Collapsed.occupies_context());
385        assert!(!Residency::PagedOut { tier: MemoryTierHint::Semantic }.occupies_context());
386    }
387
388    #[test]
389    fn plan_eviction_empty_when_no_pressure_and_no_idle() {
390        assert!(plan_eviction(PressureAction::None, false, 50_000, 2).is_empty());
391    }
392
393    #[test]
394    fn plan_eviction_emits_specific_op_for_recommended_action() {
395        let plan = plan_eviction(PressureAction::AutoCompact, false, 50_000, 3);
396        // The op carries the real preserve_turns the caller passed, not a placeholder.
397        assert!(matches!(&plan.ops[..], [EvictionOp::AutoCompact { preserve_turns: 3 }]));
398    }
399
400    #[test]
401    fn plan_eviction_collapse_carries_caller_target_tokens() {
402        // W1-1 收口: the planner stamps the caller's real target into the Collapse op (no placeholder),
403        // and the executor honors it verbatim.
404        let plan = plan_eviction(PressureAction::ContextCollapse, false, 12_345, 2);
405        assert!(matches!(&plan.ops[..], [EvictionOp::Collapse { target_tokens: 12_345 }]));
406    }
407
408    #[test]
409    fn plan_eviction_orders_time_decay_before_pressure() {
410        // Idle + rho both fire: time-decay micro runs first, then the specific op — matching
411        // the legacy checkpoint order exactly.
412        let plan = plan_eviction(PressureAction::ContextCollapse, true, 50_000, 2);
413        assert_eq!(plan.ops.len(), 2);
414        assert!(matches!(plan.ops[0], EvictionOp::TimeDecayMicro));
415        assert!(matches!(plan.ops[1], EvictionOp::Collapse { .. }));
416    }
417
418    #[test]
419    fn plan_eviction_time_decay_only() {
420        let plan = plan_eviction(PressureAction::None, true, 50_000, 2);
421        assert_eq!(plan.ops.len(), 1);
422        assert!(matches!(plan.ops[0], EvictionOp::TimeDecayMicro));
423    }
424
425    #[test]
426    fn plan_eviction_micro_compact_emits_time_decay_without_idle() {
427        // Regression: a pressure-driven MicroCompact emits a TimeDecayMicro op *independent* of the
428        // idle-decay flag. So `has_time_decay()` can be true while `idle_decay` is false — the state
429        // machine's compaction checkpoint must assert the implication (`idle_decay ⇒ has_time_decay`),
430        // NOT equality (the old `debug_assert_eq!(has_time_decay, idle_decay)` wrongly aborted here).
431        let plan = plan_eviction(PressureAction::MicroCompact, false, 50_000, 2);
432        assert!(plan.has_time_decay(), "MicroCompact yields a time-decay op even when not idle");
433        // And the checkpoint invariant the fixed assertion encodes holds for every combination:
434        for recommended in [
435            PressureAction::None,
436            PressureAction::MicroCompact,
437            PressureAction::AutoCompact,
438            PressureAction::ContextCollapse,
439        ] {
440            for idle in [false, true] {
441                let p = plan_eviction(recommended, idle, 50_000, 2);
442                assert!(!idle || p.has_time_decay(), "idle_decay must imply a time-decay op");
443            }
444        }
445    }
446
447    #[test]
448    fn eviction_op_labels() {
449        assert_eq!(EvictionOp::Spool(1).label(), "spool");
450        assert_eq!(EvictionOp::Snip { per_msg_ratio: 0.1 }.label(), "snip");
451        assert_eq!(EvictionOp::TimeDecayMicro.label(), "time_decay_micro");
452        assert_eq!(EvictionOp::Collapse { target_tokens: 5000 }.label(), "collapse");
453        assert_eq!(EvictionOp::AutoCompact { preserve_turns: 2 }.label(), "auto_compact");
454    }
455
456    #[test]
457    fn plan_spool_keeps_small_output_inline() {
458        assert_eq!(plan_spool("small", 50, 16), None);
459        // threshold 0 disables spooling.
460        assert_eq!(plan_spool(&"x".repeat(1000), 0, 16), None);
461    }
462
463    #[test]
464    fn plan_spool_previews_large_output() {
465        let output = "y".repeat(1000);
466        let d = plan_spool(&output, 100, 32).expect("should spool");
467        assert_eq!(d.original_size, 1000);
468        assert!(d.preview.starts_with(&"y".repeat(32)));
469        assert!(d.preview.contains("1000 bytes total"));
470        assert!(d.preview.len() < output.len());
471    }
472
473    #[test]
474    fn plan_spool_truncates_on_char_boundary() {
475        // multi-byte chars: preview cut must not split a char.
476        let output = "🚀".repeat(100); // 4 bytes each = 400 bytes
477        let d = plan_spool(&output, 50, 10).expect("should spool");
478        // No panic / valid UTF-8 preview is the assertion.
479        assert!(d.preview.contains("400 bytes total"));
480    }
481}