Skip to main content

deepstrike_core/context/
manager.rs

1use super::compression::CompressionPipeline;
2use super::config::ContextConfig;
3use super::partitions::ContextPartitions;
4use super::pressure::{PressureAction, PressureMonitor};
5use super::renderer::RenderedContext;
6use super::renewal::{HandoffArtifact, RenewalPolicy};
7use super::sections::{ContextSectionPartition, ContextSectionRegistry};
8use super::snapshot::{ContextSnapshotHint, ContextSnapshot};
9use super::skill_catalog::SkillCatalog;
10use super::task_state::{TaskState, TaskUpdate};
11use super::token_engine::ContextTokenEngine;
12use crate::mm::handle::{Handle, HandleId, HandleKind, HandleTable, Residency};
13use crate::types::capability::{CapabilityKind, CapabilityManifest};
14use crate::types::message::{Content, ContentPart, Message, ToolSchema};
15use crate::types::skill::SkillMetadata;
16use compact_str::CompactString;
17
18pub const MEMORY_TOOL_NAME: &str = "memory";
19pub const KNOWLEDGE_TOOL_NAME: &str = "knowledge";
20
21/// Internal context engine backing [`crate::runtime::KernelRuntime`].
22///
23/// Exposed for in-crate use and tests; external callers should drive the kernel
24/// through `KernelRuntime` rather than this type directly.
25#[doc(hidden)]
26pub struct ContextManager {
27    pub partitions: ContextPartitions,
28    pub max_tokens: u32,
29    pub config: ContextConfig,
30    pub engine: ContextTokenEngine,
31    pub sprint: u32,
32    pub last_handoff: Option<HandoffArtifact>,
33    pub skills: SkillCatalog,
34    /// P1-B tool gating: the set of skills the model has loaded this session (by name). Their
35    /// declared `allowed_tools` are unioned to narrow the exposed toolset in `emit_call_llm`.
36    /// A set (not a single value) because the model may load several skills and still needs each
37    /// one's tools (D1). v1 accumulates (no eviction). Snapshotted for wake/resume.
38    pub active_skills: std::collections::BTreeSet<CompactString>,
39    /// P1-B/D stable-core: tool ids that stay exposed even when a skill narrows the toolset (the
40    /// "everyone uses these" set — read/search/bash etc.). Configured once by the SDK; empty by
41    /// default (铁律: no config ⇒ skills narrow to exactly their declared tools + meta-tools).
42    pub stable_core_tools: std::collections::HashSet<CompactString>,
43    pub capabilities: CapabilityManifest,
44    pub sections: ContextSectionRegistry,
45    pub memory_enabled: bool,
46    pub knowledge_enabled: bool,
47    pub plan_tool_enabled: bool,
48    last_observed_prompt_tokens: Option<u32>,
49    compression: CompressionPipeline,
50    pressure: PressureMonitor,
51    renewal: RenewalPolicy,
52
53    // ── Layer 3: Time tracking for decay ─────────────────────────────────
54
55    /// Last activity timestamp (milliseconds since epoch).
56    /// Updated on each ProviderResult and ToolResults.
57    pub last_activity_ms: u64,
58
59    /// Last compression timestamp (milliseconds since epoch).
60    /// Updated on each compression pass.
61    pub last_compact_ms: Option<u64>,
62
63    // ── P3: handle table (context as address space) ─────────────────────────
64
65    /// Per-task handle table: one [`Handle`] per addressable working-context object (tool results
66    /// today). Residency transitions on these handles drive read-time projection (Layer 4) and
67    /// spool (Layer 1) — the original messages in `partitions` are never mutated by projection.
68    pub handles: HandleTable,
69    /// Monotonic allocator for [`HandleId`]s.
70    next_handle_id: HandleId,
71
72    /// P1-E: history length (message count) as of the last compaction/renewal. Messages below this
73    /// index are the **frozen prefix** — byte-stable until the next compaction — so the renderer can
74    /// hand providers a `frozen_prefix_len` for a long-lived deep cache breakpoint. 0 before any
75    /// compaction (no frozen region yet). Not snapshotted: on resume it resets to 0 and rebuilds at
76    /// the next compaction (graceful — only the deep-cache durability lapses, never correctness).
77    frozen_history_len: usize,
78}
79
80impl ContextManager {
81    pub fn new(max_tokens: u32) -> Self {
82        Self::with_config(max_tokens, ContextConfig::default(), ContextTokenEngine::char_approx())
83    }
84
85    pub fn with_config(max_tokens: u32, config: ContextConfig, engine: ContextTokenEngine) -> Self {
86        let compression = CompressionPipeline::new(&config);
87        let pressure = PressureMonitor::new(max_tokens, config.clone());
88        let renewal = RenewalPolicy::from_config(&config);
89        let partitions = ContextPartitions::new(&config);
90        Self {
91            partitions, max_tokens, config, engine,
92            sprint: 0, last_handoff: None,
93            skills: SkillCatalog::new(),
94            active_skills: std::collections::BTreeSet::new(),
95            stable_core_tools: std::collections::HashSet::new(),
96            capabilities: CapabilityManifest::new(),
97            sections: ContextSectionRegistry::default_agent_sections(),
98            memory_enabled: false, knowledge_enabled: false, plan_tool_enabled: false,
99            last_observed_prompt_tokens: None,
100            compression, pressure, renewal,
101            last_activity_ms: 0,
102            last_compact_ms: None,
103            handles: HandleTable::new(),
104            next_handle_id: 0,
105            frozen_history_len: 0,
106        }
107    }
108
109    // ── Layer 3: Time-based decay ─────────────────────────────────────────────
110
111    /// Update activity timestamp (call on each ProviderResult and ToolResults).
112    pub fn record_activity(&mut self, now_ms: u64) {
113        self.last_activity_ms = now_ms;
114    }
115
116    /// Check if Micro-Compact should trigger based on time decay (Layer 3).
117    /// Returns true if idle time exceeds `micro_compact_idle_minutes`.
118    pub fn should_time_decay_compact(&self, now_ms: u64) -> bool {
119        let idle_ms = if let Some(last_compact) = self.last_compact_ms {
120            // Time since last compression
121            now_ms.saturating_sub(last_compact)
122        } else {
123            // Time since first activity
124            now_ms.saturating_sub(self.last_activity_ms)
125        };
126
127        let idle_minutes = idle_ms / 60_000;
128        idle_minutes >= self.config.micro_compact_idle_minutes as u64
129    }
130
131    // ── Layer 4: read-time projection (handle residency) ────────────────────
132
133    /// Recompute tool-result handle residency for Layer-4 read-time projection (call before
134    /// `render`). When pressure (`rho`) reaches `collapse_threshold`, all but the most recent
135    /// `preserve_recent_msgs` tool results are marked `Collapsed` (rendered as previews).
136    ///
137    /// **Monotonic within a cache generation (P0-C):** collapse is one-way here —
138    /// `Resident → Collapsed` only, never the reverse. The old two-way version un-collapsed when
139    /// `rho` fell back below the threshold, which (a) rewrote mid-history bytes and invalidated the
140    /// prompt-cache prefix on every threshold oscillation, and (b) re-billed a full tool-result body
141    /// for near-zero attention gain (an old result that already faded). Un-collapsing now happens
142    /// only at compaction/renewal boundaries via [`Self::reset_collapse_generation`] — the one moment
143    /// the prefix is rewritten anyway, so the cache cost is already paid. Non-destructive:
144    /// `partitions` is untouched. Spooled/paged-out handles are left as-is.
145    pub fn recompute_handle_residency(&mut self) {
146        // Monotonic: below the threshold we never *un*-collapse, so there is nothing to do.
147        if self.rho() < self.config.collapse_threshold {
148            return;
149        }
150        let keep = self.config.preserve_recent_msgs;
151        // Single mutable pass in insertion order. `tool_result_handles_mut().enumerate()` yields the
152        // collapse candidates oldest-first; `i < cutoff` protects the most recent `keep` results.
153        let total = self
154            .handles
155            .all()
156            .iter()
157            .filter(|h| matches!(h.kind, HandleKind::ToolResult))
158            .count();
159        let cutoff = total.saturating_sub(keep);
160        for (i, handle) in self.handles.tool_result_handles_mut().enumerate() {
161            // Only fold the reversible Resident → Collapsed axis; never clobber a handle that has
162            // been spooled or paged out, and never reverse an existing collapse mid-generation.
163            if i < cutoff && matches!(handle.residency, Residency::Resident) {
164                handle.residency = Residency::Collapsed;
165            }
166        }
167    }
168
169    /// Start a fresh collapse generation: un-collapse every `Collapsed` handle back to `Resident`.
170    /// Called only at compaction/renewal boundaries — the sole points where un-collapsing is
171    /// cache-free, since the rendered prefix is rewritten there regardless. Between boundaries
172    /// [`Self::recompute_handle_residency`] keeps collapse strictly one-way (P0-C). Spooled/paged-out
173    /// handles are untouched (they leave the Resident↔Collapsed cycle deliberately).
174    pub fn reset_collapse_generation(&mut self) {
175        for handle in self.handles.all_mut() {
176            if matches!(handle.residency, Residency::Collapsed) {
177                handle.residency = Residency::Resident;
178            }
179        }
180    }
181
182    /// Drop handles whose anchored source message no longer lives in `partitions.history` — i.e.
183    /// archived by a compaction or dropped on renewal. Without this the handle table grows with
184    /// total session length (a handle per tool result, never removed), which also inflates the
185    /// per-turn `recompute_handle_residency` scan. Called at compaction/renewal boundaries, so the
186    /// table tracks the working set, not the whole session. Handles with no `source` anchor (future
187    /// non-tool-result kinds) are always kept — they can't be orphaned by this check.
188    pub fn prune_orphaned_handles(&mut self) {
189        let live: std::collections::HashSet<CompactString> = self
190            .partitions
191            .history
192            .messages
193            .iter()
194            .flat_map(|m| match &m.content {
195                Content::Parts(parts) => parts
196                    .iter()
197                    .filter_map(|p| match p {
198                        ContentPart::ToolResult { call_id, .. } => Some(call_id.clone()),
199                        _ => None,
200                    })
201                    .collect::<Vec<_>>(),
202                _ => Vec::new(),
203            })
204            .collect();
205        self.handles
206            .retain(|h| h.source.as_ref().is_none_or(|s| live.contains(s)));
207    }
208
209    /// Mark the handle anchored to `call_id` as spooled to disk (Layer 1): the SDK persists the
210    /// full output, working context keeps only the preview. Keeps the handle out of the
211    /// Resident↔Collapsed projection cycle. No-op if no handle is anchored to `call_id`.
212    pub fn mark_spooled(&mut self, call_id: &str, spool_ref: impl Into<String>) {
213        let spool_ref = spool_ref.into();
214        if let Some(handle) = self
215            .handles
216            .all_mut()
217            .iter_mut()
218            .find(|h| h.source.as_deref() == Some(call_id))
219        {
220            handle.residency = Residency::SpooledOut { r: spool_ref };
221        }
222    }
223
224    // ── Pressure ──────────────────────────────────────────────────────────────
225
226    /// **Raw** rho — full partition weight (or provider-observed tokens when available). This is the
227    /// projection-decision rho: [`Self::recompute_handle_residency`] marks the Resident↔Collapsed set
228    /// from *this* value, so it must NOT discount paged content (else collapse → rho drops →
229    /// un-collapse would oscillate). Compaction/renewal triggers use [`Self::effective_rho`] instead.
230    pub fn rho(&self) -> f64 {
231        self.pressure
232            .pressure(&self.partitions, &self.engine, self.last_observed_prompt_tokens)
233    }
234
235    /// **Effective** rho — the pressure that actually drives compaction/renewal, made paging-aware.
236    ///
237    /// When provider usage is authoritative (`observed_prompt_tokens` set), the rendered prompt was
238    /// already collapsed (the renderer emits previews for `Collapsed` handles), so the observed count
239    /// already reflects paging — raw rho is exact and returned as-is. In the **estimate** path
240    /// (no observed tokens) we estimate from `partitions`, which still carry the full weight of
241    /// paged-out tool results (collapse is non-destructive); we subtract the non-resident handle
242    /// tokens so that collapsing/spooling a result immediately relieves pressure, rather than only
243    /// after the next provider round-trip. With no paged handles this equals [`Self::rho`], so the
244    /// pre-paging behavior is preserved exactly.
245    pub fn effective_rho(&self) -> f64 {
246        if self.max_tokens == 0 || self.last_observed_prompt_tokens.is_some() {
247            return self.rho();
248        }
249        let total = self.partitions.total_tokens(&self.engine);
250        let effective = total.saturating_sub(self.handles.non_resident_tokens());
251        effective as f64 / self.max_tokens as f64
252    }
253
254    pub fn set_observed_prompt_tokens(&mut self, tokens: u32) {
255        self.last_observed_prompt_tokens = Some(tokens);
256    }
257
258    pub fn should_compress(&self) -> PressureAction {
259        // Compaction-tier recommendation runs on **raw** rho. The paging-aware `effective_rho` was
260        // wired here during W1-1 but it over-relieved pressure: once `micro_compact` paged out
261        // tool-result handles, effective rho fell below the collapse/auto_compact thresholds, so the
262        // heavy tiers never fired — violating W1-1's own DoD ("既有压缩 golden 不变" /
263        // "AutoCompact 后 wake 注入语义摘要"). Until the full cache-aware planner lands (the planner
264        // that scores prefix-invalidation per op, `effective_rho` reserved for it), the tier trigger
265        // must use raw rho so escalation is preserved. `effective_rho` stays defined + tested for
266        // that work; it is intentionally not consulted by the trigger today.
267        self.pressure.recommend(self.rho())
268    }
269
270    pub fn compress(&mut self, action: PressureAction) -> (u32, Option<String>, Vec<Message>, Option<usize>) {
271        self.compress_with_time(action, None)
272    }
273
274    pub fn compress_with_time(
275        &mut self,
276        action: PressureAction,
277        now_ms: Option<u64>,
278    ) -> (u32, Option<String>, Vec<Message>, Option<usize>) {
279        if self.sections.is_partition_pinned(ContextSectionPartition::History) {
280            return (0, None, vec![], None);
281        }
282
283        let result = {
284            let target = self.config.target_tokens(self.max_tokens);
285            self.compression.compress(&mut self.partitions, action, self.max_tokens, target, &self.engine)
286        };
287
288        // Record compression timestamp if provided
289        if let Some(ts) = now_ms {
290            self.last_compact_ms = Some(ts);
291        }
292
293        // Archived messages have left history — drop their now-orphaned handles (bounds the table).
294        if !result.2.is_empty() {
295            self.prune_orphaned_handles();
296            // Compaction rewrote the history prefix — start a fresh collapse generation so
297            // surviving handles re-evaluate from Resident (P0-C: the one cache-free un-collapse point).
298            self.reset_collapse_generation();
299        }
300        // P2-D × P1-E: re-anchor the frozen-prefix boundary only when the compaction actually broke
301        // the prompt-cache prefix (`result.3` = the planner's per-step `cache_at` cost, `Some` ⇒ a
302        // prefix break). A prefix-safe compaction (late Snip/Excerpt that touches no early message)
303        // leaves `[0..frozen]` byte-stable, so the deep cache survives the compaction and the boundary
304        // holds — strictly more precise than the old `archived`-keyed reset, which missed an early
305        // in-place Snip and needlessly re-anchored after a prefix-safe pass.
306        if result.3.is_some() {
307            self.frozen_history_len = self.partitions.history.messages.len();
308        }
309
310        result
311    }
312
313    pub fn force_compress(&mut self) -> (u32, Option<String>, Vec<Message>, Option<usize>) {
314        if self.sections.is_partition_pinned(ContextSectionPartition::History) {
315            return (0, None, vec![], None);
316        }
317        let result = self.compression.compress(&mut self.partitions, PressureAction::AutoCompact, self.max_tokens, 0, &self.engine);
318        if !result.2.is_empty() {
319            self.prune_orphaned_handles();
320            // Compaction rewrote the history prefix — start a fresh collapse generation so
321            // surviving handles re-evaluate from Resident (P0-C: the one cache-free un-collapse point).
322            self.reset_collapse_generation();
323        }
324        // P2-D × P1-E: re-anchor the frozen-prefix boundary only when the compaction actually broke
325        // the prompt-cache prefix (`result.3` = the planner's per-step `cache_at` cost, `Some` ⇒ a
326        // prefix break). A prefix-safe compaction (late Snip/Excerpt that touches no early message)
327        // leaves `[0..frozen]` byte-stable, so the deep cache survives the compaction and the boundary
328        // holds — strictly more precise than the old `archived`-keyed reset, which missed an early
329        // in-place Snip and needlessly re-anchored after a prefix-safe pass.
330        if result.3.is_some() {
331            self.frozen_history_len = self.partitions.history.messages.len();
332        }
333        result
334    }
335
336    /// W1-1 收口: run one compaction `action` toward an **explicit** `target_tokens`, instead of
337    /// re-deriving the target from config. This is what lets `EvictionOp::Collapse { target_tokens }`
338    /// flow from the planner (the single decision point) straight to the executor — the compactor no
339    /// longer re-decides the target. `compress_with_time` remains the config-derived convenience used
340    /// by the other layers (Snip/Micro), whose target equals `config.target_tokens(max_tokens)`.
341    pub fn compress_with_target(
342        &mut self,
343        action: PressureAction,
344        target_tokens: u32,
345        now_ms: Option<u64>,
346    ) -> (u32, Option<String>, Vec<Message>, Option<usize>) {
347        if self.sections.is_partition_pinned(ContextSectionPartition::History) {
348            return (0, None, vec![], None);
349        }
350        let result =
351            self.compression
352                .compress(&mut self.partitions, action, self.max_tokens, target_tokens, &self.engine);
353        if let Some(ts) = now_ms {
354            self.last_compact_ms = Some(ts);
355        }
356        if !result.2.is_empty() {
357            self.prune_orphaned_handles();
358            // Compaction rewrote the history prefix — start a fresh collapse generation so
359            // surviving handles re-evaluate from Resident (P0-C: the one cache-free un-collapse point).
360            self.reset_collapse_generation();
361        }
362        // P2-D × P1-E: re-anchor the frozen-prefix boundary only when the compaction actually broke
363        // the prompt-cache prefix (`result.3` = the planner's per-step `cache_at` cost, `Some` ⇒ a
364        // prefix break). A prefix-safe compaction (late Snip/Excerpt that touches no early message)
365        // leaves `[0..frozen]` byte-stable, so the deep cache survives the compaction and the boundary
366        // holds — strictly more precise than the old `archived`-keyed reset, which missed an early
367        // in-place Snip and needlessly re-anchored after a prefix-safe pass.
368        if result.3.is_some() {
369            self.frozen_history_len = self.partitions.history.messages.len();
370        }
371        result
372    }
373
374    /// W1-1 收口: the truthful compaction parameters the planner stamps into the [`EvictionPlan`],
375    /// read once from config so the ops carry real values (not magic-number placeholders) and the
376    /// executor stays a pure executor. Returns `(target_tokens, preserve_recent_turns)`.
377    pub fn plan_compaction_params(&self) -> (u32, usize) {
378        (
379            self.config.target_tokens(self.max_tokens),
380            self.config.preserve_recent_turns,
381        )
382    }
383
384    // ── Renewal ───────────────────────────────────────────────────────────────
385
386    pub fn should_renew(&self) -> bool {
387        self.renewal.should_renew(&self.pressure, &self.partitions, &self.engine)
388    }
389
390    pub fn renew(&mut self) {
391        let goal = self.partitions.task_state.goal.clone();
392        let (renewed, artifact) = self.renewal.renew(&self.partitions, &goal, self.sprint, self.max_tokens);
393        self.partitions = renewed;
394        self.last_handoff = Some(artifact);
395        self.sprint += 1;
396        // History was rebuilt wholesale — drop handles anchored to messages it no longer carries,
397        // and start a fresh collapse generation (P0-C) since the whole prefix changed.
398        self.prune_orphaned_handles();
399        self.reset_collapse_generation();
400        // P1-E: the renewed history is the new frozen base.
401        self.frozen_history_len = self.partitions.history.messages.len();
402    }
403
404    // ── Render ────────────────────────────────────────────────────────────────
405
406    pub fn render(&self) -> RenderedContext {
407        super::renderer::render_projected(
408            &self.partitions,
409            self.max_tokens,
410            &self.engine,
411            self.config.preserve_recent_msgs,
412            &self.handles,
413            self.frozen_history_len,
414        )
415    }
416
417    pub fn snapshot_hint(&self) -> ContextSnapshotHint {
418        ContextSnapshotHint::from_parts(&self.sections, &self.capabilities)
419    }
420
421    pub fn take_snapshot(&self, turn: u32) -> ContextSnapshot {
422        ContextSnapshot {
423            turn,
424            system_messages: self.partitions.system.messages.clone(),
425            knowledge_messages: self.partitions.knowledge.messages.clone(),
426            history_messages: self.partitions.history.messages.clone(),
427            task_state: self.partitions.task_state.clone(),
428        }
429    }
430
431    // ── History / Knowledge ───────────────────────────────────────────────────
432
433    pub fn push_history(&mut self, msg: Message, tokens: u32) {
434        // P3 (3a): index each tool result entering working context as a handle, anchored to its
435        // call_id. Pure bookkeeping — render/compression still read `partitions` until 3b. The
436        // handle's residency later drives read-time projection without mutating the message.
437        if let Content::Parts(parts) = &msg.content {
438            for part in parts {
439                if let ContentPart::ToolResult { call_id, output, .. } = part {
440                    let id = self.alloc_handle_id();
441                    let tok = self.engine.count(output).max(1);
442                    self.handles.insert(Handle::resident_for(
443                        id,
444                        HandleKind::ToolResult,
445                        tok,
446                        call_id.clone(),
447                    ));
448                }
449            }
450        }
451        self.partitions.history.push(msg, tokens);
452    }
453
454    fn alloc_handle_id(&mut self) -> HandleId {
455        let id = self.next_handle_id;
456        self.next_handle_id = self.next_handle_id.wrapping_add(1);
457        id
458    }
459
460    /// Push content into the Knowledge slot (memory retrievals, skill defs, artifacts).
461    pub fn push_knowledge(&mut self, msg: Message, tokens: u32) {
462        self.partitions.knowledge.push(msg, tokens);
463    }
464
465    /// Push a runtime signal into the current turn's State slot.
466    /// Signals are ephemeral — cleared after each render.
467    pub fn push_signal(&mut self, text: String) {
468        self.partitions.signals.push(text);
469    }
470
471    /// Record a durable user directive in the (non-compressible, renewal-carried) task_state, so a
472    /// mid-task user command keeps its salience across compaction/renewal — unlike the ephemeral
473    /// signal channel, which is cleared on renewal.
474    pub fn record_directive(&mut self, text: impl Into<String>) {
475        self.partitions.task_state.record_directive(text);
476    }
477
478    // ── Task state ────────────────────────────────────────────────────────────
479
480    pub fn init_task(&mut self, goal: String, criteria: Vec<String>) {
481        self.partitions.task_state = TaskState { goal, criteria, ..Default::default() };
482    }
483
484    pub fn update_task(&mut self, update: TaskUpdate) {
485        self.partitions.task_state.apply(update);
486    }
487
488    // ── Section pinning ───────────────────────────────────────────────────────
489
490    pub fn pin_section(&mut self, id: &str) -> bool { self.sections.pin(id) }
491    pub fn unpin_section(&mut self, id: &str) -> bool { self.sections.unpin(id) }
492
493    // ── Skills ────────────────────────────────────────────────────────────────
494
495    pub fn set_available_skills(&mut self, skills: Vec<SkillMetadata>) {
496        self.capabilities.remove_kind(CapabilityKind::Skill);
497        for skill in &skills { self.capabilities.add_skill(skill.clone()); }
498        self.skills.set_available(skills);
499    }
500
501    /// P1-B/D: set the stable-core tool ids (always exposed under skill gating). Replaces any prior.
502    pub fn set_stable_core_tools(&mut self, ids: impl IntoIterator<Item = CompactString>) {
503        self.stable_core_tools = ids.into_iter().collect();
504    }
505
506    /// P1-B: record that the model has loaded a skill (its content is now in context). Returns
507    /// `true` if this changed the active set — an epoch boundary the SDK can use to re-anchor the
508    /// prompt cache (D). No-op (returns false) when the skill was already active.
509    pub fn activate_skill(&mut self, name: impl Into<CompactString>) -> bool {
510        self.active_skills.insert(name.into())
511    }
512
513    /// P1-B: the tool-id allow-set to narrow the exposed toolset to, given the active skills.
514    /// Returns `None` ⇒ **do not narrow** (no skill active, or some active skill declares no
515    /// `allowed_tools` ⇒ unbounded, errs-open per D3). `Some(set)` ⇒ narrow to `set` (the union of
516    /// every active skill's declared tools). Meta-tools and stable-core are layered on in
517    /// `emit_call_llm`, not here.
518    pub fn active_skill_tool_filter(&self) -> Option<std::collections::HashSet<CompactString>> {
519        if self.active_skills.is_empty() {
520            return None;
521        }
522        let mut union = std::collections::HashSet::new();
523        for name in &self.active_skills {
524            let declared = self.skills.allowed_tools(name);
525            if declared.is_empty() {
526                return None; // an unrestricted active skill ⇒ no narrowing (D3)
527            }
528            union.extend(declared.iter().cloned());
529        }
530        Some(union)
531    }
532
533    pub fn skill_tool_schema(&self) -> Option<ToolSchema> {
534        self.skills.build_tool_schema()
535    }
536
537    // ── Meta-tools ────────────────────────────────────────────────────────────
538
539    pub fn set_memory_enabled(&mut self, enabled: bool) {
540        self.memory_enabled = enabled;
541        if enabled {
542            self.capabilities.add_marker(CapabilityKind::Memory, MEMORY_TOOL_NAME,
543                "Search long-term memory through the memory meta-tool.");
544        } else {
545            self.capabilities.remove(CapabilityKind::Memory, MEMORY_TOOL_NAME);
546        }
547    }
548
549    pub fn set_knowledge_enabled(&mut self, enabled: bool) {
550        self.knowledge_enabled = enabled;
551        if enabled {
552            self.capabilities.add_marker(CapabilityKind::Knowledge, KNOWLEDGE_TOOL_NAME,
553                "Search external knowledge through the knowledge meta-tool.");
554        } else {
555            self.capabilities.remove(CapabilityKind::Knowledge, KNOWLEDGE_TOOL_NAME);
556        }
557    }
558
559    pub fn set_plan_tool_enabled(&mut self, enabled: bool) {
560        self.plan_tool_enabled = enabled;
561        if enabled {
562            self.capabilities.add_marker(CapabilityKind::Tool, "update_plan",
563                "Update task plan and progress through the planning meta-tool.");
564        } else {
565            self.capabilities.remove(CapabilityKind::Tool, "update_plan");
566        }
567    }
568
569    pub fn capability_inventory(&self) -> String { self.capabilities.format_inventory() }
570
571    pub fn meta_tool_schemas(&self) -> Vec<ToolSchema> {
572        let mut tools = Vec::new();
573        if let Some(t) = self.skill_tool_schema() { tools.push(t); }
574        if let Some(t) = self.memory_tool_schema() { tools.push(t); }
575        if let Some(t) = self.knowledge_tool_schema() { tools.push(t); }
576        if let Some(t) = self.plan_tool_schema() { tools.push(t); }
577        tools.sort_by(|a, b| a.name.cmp(&b.name));
578        tools
579    }
580
581    pub fn plan_tool_schema(&self) -> Option<ToolSchema> {
582        if !self.plan_tool_enabled { return None; }
583        Some(ToolSchema {
584            name: CompactString::new("update_plan"),
585            description: "Update your task plan and progress. Call this after completing a step or when the plan changes.".to_string(),
586            parameters: serde_json::json!({
587                "type": "object",
588                "properties": {
589                    "plan": { "type": "array", "items": { "type": "string" } },
590                    "current_step": { "type": "integer" },
591                    "progress": { "type": "string" },
592                    "blocked_on": { "type": "array", "items": { "type": "string" } }
593                }
594            }),
595        })
596    }
597
598    pub fn memory_tool_schema(&self) -> Option<ToolSchema> {
599        if !self.memory_enabled { return None; }
600        Some(ToolSchema {
601            name: CompactString::new(MEMORY_TOOL_NAME),
602            description: "Search your long-term memory for relevant past experiences and knowledge.".to_string(),
603            parameters: serde_json::json!({
604                "type": "object",
605                "properties": {
606                    "query": { "type": "string" },
607                    "top_k": { "type": "integer" }
608                },
609                "required": ["query"]
610            }),
611        })
612    }
613
614    pub fn knowledge_tool_schema(&self) -> Option<ToolSchema> {
615        if !self.knowledge_enabled { return None; }
616        Some(ToolSchema {
617            name: CompactString::new(KNOWLEDGE_TOOL_NAME),
618            description: "Search the external knowledge base for facts, documentation, or reference data.".to_string(),
619            parameters: serde_json::json!({
620                "type": "object",
621                "properties": {
622                    "query": { "type": "string" },
623                    "top_k": { "type": "integer" }
624                },
625                "required": ["query"]
626            }),
627        })
628    }
629}
630
631#[cfg(test)]
632mod tests {
633    use super::*;
634    use crate::context::task_state::PlanStep;
635    use crate::types::message::Message;
636    use crate::types::skill::SkillMetadata;
637
638    #[test]
639    fn manager_renew_uses_task_state_goal() {
640        let mut mgr = ContextManager::new(1_000);
641        mgr.init_task("test goal".to_string(), vec![]);
642        mgr.partitions.system.push(Message::system("rules"), 10);
643        for i in 0..10 { mgr.push_history(Message::user(format!("msg {i}")), 50); }
644        mgr.renew();
645        let artifact = mgr.last_handoff.as_ref().unwrap();
646        assert_eq!(artifact.goal, "test goal");
647        assert_eq!(mgr.sprint, 1);
648    }
649
650    #[test]
651    fn compress_only_touches_history() {
652        let mut mgr = ContextManager::new(1_000);
653        mgr.push_knowledge(Message::system("knowledge content"), 100);
654        for _ in 0..30 { mgr.push_history(Message::user("history msg"), 50); }
655        let knowledge_before = mgr.partitions.knowledge.token_count;
656        let history_before = mgr.partitions.history.token_count;
657        mgr.compress(PressureAction::AutoCompact);
658        assert_eq!(mgr.partitions.knowledge.token_count, knowledge_before);
659        assert!(mgr.partitions.history.token_count < history_before);
660    }
661
662    #[test]
663    fn init_task_sets_goal_and_criteria() {
664        let mut mgr = ContextManager::new(1_000);
665        mgr.init_task("analyse data".to_string(), vec!["criterion A".to_string()]);
666        assert_eq!(mgr.partitions.task_state.goal, "analyse data");
667        assert_eq!(mgr.partitions.task_state.criteria, ["criterion A"]);
668    }
669
670    #[test]
671    fn update_task_applies_plan() {
672        let mut mgr = ContextManager::new(1_000);
673        mgr.init_task("g".to_string(), vec![]);
674        mgr.update_task(TaskUpdate {
675            plan: Some(vec!["step 1".to_string(), "step 2".to_string()]),
676            current_step: Some(0),
677            ..Default::default()
678        });
679        assert_eq!(mgr.partitions.task_state.plan.len(), 2);
680        assert_eq!(mgr.partitions.task_state.current_step, Some(0));
681    }
682
683    #[test]
684    fn task_state_survives_autocompact() {
685        let mut mgr = ContextManager::new(1_000);
686        mgr.init_task("survive compression".to_string(), vec![]);
687        mgr.update_task(TaskUpdate {
688            plan: Some(vec!["fetch data".to_string(), "analyse".to_string()]),
689            ..Default::default()
690        });
691        for _ in 0..10 { mgr.push_history(Message::user("filler"), 50); }
692        mgr.compress(PressureAction::AutoCompact);
693        assert_eq!(mgr.partitions.task_state.goal, "survive compression");
694        assert_eq!(mgr.partitions.task_state.plan.len(), 2);
695    }
696
697    #[test]
698    fn render_includes_task_state_in_state_turn_not_system() {
699        let mut mgr = ContextManager::new(10_000);
700        mgr.init_task("find anomalies".to_string(), vec![]);
701        let rc = mgr.render();
702        assert!(!rc.system_text.contains("[TASK STATE]"), "task_state must not be in system_text");
703        // State turn is separated from the cacheable history (turns).
704        let state = rc.state_turn.as_ref().expect("should have a state turn");
705        assert!(state.content.as_text().unwrap().contains("[TASK STATE] goal: find anomalies"));
706    }
707
708    #[test]
709    fn renewal_open_tasks_from_task_state() {
710        let mut mgr = ContextManager::new(1_000);
711        mgr.init_task("g".to_string(), vec![]);
712        mgr.partitions.task_state.plan = vec![
713            PlanStep { label: "done".to_string(), done: true },
714            PlanStep { label: "pending".to_string(), done: false },
715        ];
716        mgr.renew();
717        let artifact = mgr.last_handoff.as_ref().unwrap();
718        assert_eq!(artifact.open_tasks, vec!["pending"]);
719    }
720
721    #[test]
722    fn pinned_history_section_skips_compression() {
723        let mut mgr = ContextManager::new(1_000);
724        for _ in 0..30 { mgr.push_history(Message::user("filler message for pinning test"), 50); }
725        let tokens_before = mgr.partitions.history.token_count;
726        mgr.pin_section("history.rolling");
727        let (saved, _, _, _) = mgr.compress(PressureAction::AutoCompact);
728        assert_eq!(saved, 0);
729        assert_eq!(mgr.partitions.history.token_count, tokens_before);
730    }
731
732    #[test]
733    fn unpinned_history_section_allows_compression() {
734        let mut mgr = ContextManager::new(1_000);
735        for _ in 0..30 { mgr.push_history(Message::user("filler"), 50); }
736        mgr.pin_section("history.rolling");
737        mgr.unpin_section("history.rolling");
738        let (saved, _, _, _) = mgr.compress(PressureAction::AutoCompact);
739        assert!(saved > 0);
740    }
741
742    #[test]
743    fn force_compress_also_skips_when_history_pinned() {
744        let mut mgr = ContextManager::new(1_000);
745        for _ in 0..10 { mgr.push_history(Message::user("filler"), 50); }
746        mgr.pin_section("history.rolling");
747        let (saved, _, _, _) = mgr.force_compress();
748        assert_eq!(saved, 0);
749    }
750
751    // ── W1-1 完成态 regression gates (Step 0). RED until the planner/pure-executor rewrite. ──
752
753    #[test]
754    fn auto_compact_entry_logs_auto_compact_action() {
755        // C regression gate: `force_compress` is the auto-compact entry point; the summary the
756        // provider eventually sees (rendered from `compression_log`) must carry the **auto_compact**
757        // label. The broken W1 cascade ran `compress(AutoCompact, target=0)`, so `CollapseCompactor`
758        // drained the whole history first and logged `context_collapse`, then `AutoCompactor` had
759        // nothing to archive — the event was labeled `auto_compact` but the log/render showed
760        // `context_collapse`. The pure-executor model logs with the op's own label, restoring the
761        // op-label == log-label contract end users observe (node K04/K09).
762        let mut mgr = ContextManager::new(1_000);
763        for i in 0..40 {
764            mgr.push_history(Message::user(format!("turn {i}: {}", "ctx ".repeat(40))), 200);
765        }
766        let (saved, summary, _, _) = mgr.force_compress();
767        assert!(saved > 0, "force_compress should compact a large history");
768        assert!(summary.is_some(), "auto-compact summarizes the archived turns");
769        let actions: Vec<&str> = mgr
770            .partitions
771            .task_state
772            .compression_log
773            .iter()
774            .map(|e| e.action.as_str())
775            .collect();
776        assert!(
777            actions.last() == Some(&"auto_compact"),
778            "auto-compact entry must log an auto_compact action; got {actions:?}"
779        );
780    }
781
782    #[test]
783    fn skill_tool_schema_empty_when_no_skills() {
784        let mgr = ContextManager::new(10_000);
785        assert!(mgr.skill_tool_schema().is_none());
786    }
787
788    #[test]
789    fn skill_tool_schema_present_when_registered() {
790        let mut mgr = ContextManager::new(10_000);
791        mgr.set_available_skills(vec![SkillMetadata::new("debug", "Debug helper")]);
792        assert!(mgr.skill_tool_schema().unwrap().description.contains("debug"));
793    }
794
795    #[test]
796    fn available_skills_are_reflected_in_capability_manifest() {
797        let mut mgr = ContextManager::new(1_000);
798        mgr.set_available_skills(vec![SkillMetadata::new("debug", "Debug helper")]);
799        let inventory = mgr.capability_inventory();
800        assert!(inventory.contains("debug"));
801        assert!(inventory.contains("Debug helper"));
802    }
803
804    #[test]
805    fn toggled_meta_tools_are_reflected_in_capability_manifest() {
806        let mut mgr = ContextManager::new(1_000);
807        mgr.set_memory_enabled(true);
808        assert!(mgr.capability_inventory().contains(MEMORY_TOOL_NAME));
809        mgr.set_memory_enabled(false);
810        assert!(!mgr.capability_inventory().contains(MEMORY_TOOL_NAME));
811    }
812
813    #[test]
814    fn meta_tool_schemas_are_sorted() {
815        let mut mgr = ContextManager::new(1_000);
816        mgr.set_available_skills(vec![SkillMetadata::new("debug", "Debug helper")]);
817        mgr.set_memory_enabled(true);
818        mgr.set_knowledge_enabled(true);
819        let names = mgr.meta_tool_schemas().into_iter().map(|s| s.name.to_string()).collect::<Vec<_>>();
820        assert_eq!(names, ["knowledge", "memory", "skill"]);
821    }
822
823    #[test]
824    fn section_registry_is_available_on_manager() {
825        let mgr = ContextManager::new(1_000);
826        assert!(mgr.sections.get("capabilities.inventory").is_some());
827    }
828
829    #[test]
830    fn b1_active_skill_state_and_tool_filter() {
831        let mut mgr = ContextManager::new(1_000);
832        let mut debug = SkillMetadata::new("debug", "Debug helper");
833        debug.allowed_tools = vec![CompactString::new("read"), CompactString::new("grep")];
834        let mut review = SkillMetadata::new("review", "Reviewer");
835        review.allowed_tools = vec![CompactString::new("git_diff")];
836        let plain = SkillMetadata::new("plain", "No tools declared"); // empty allowed_tools
837        mgr.set_available_skills(vec![debug, review, plain]);
838
839        // No active skill ⇒ no narrowing.
840        assert!(mgr.active_skill_tool_filter().is_none());
841
842        // Activating returns the epoch-boundary changed flag.
843        assert!(mgr.activate_skill("debug"));
844        assert!(!mgr.activate_skill("debug")); // already active ⇒ no change
845
846        // One restricted skill ⇒ narrow to its tools.
847        let f = mgr.active_skill_tool_filter().unwrap();
848        assert_eq!(f.len(), 2);
849        assert!(f.contains(&CompactString::new("read")) && f.contains(&CompactString::new("grep")));
850
851        // Second restricted skill ⇒ union (D1).
852        mgr.activate_skill("review");
853        let f = mgr.active_skill_tool_filter().unwrap();
854        assert_eq!(f.len(), 3);
855        assert!(f.contains(&CompactString::new("git_diff")));
856
857        // An active skill with NO declared tools ⇒ unbounded ⇒ do not narrow (D3, errs-open).
858        mgr.activate_skill("plain");
859        assert!(mgr.active_skill_tool_filter().is_none());
860    }
861
862    #[test]
863    fn snapshot_hint_changes_when_capabilities_change() {
864        let mut mgr = ContextManager::new(1_000);
865        let before = mgr.snapshot_hint();
866        mgr.set_memory_enabled(true);
867        let after = mgr.snapshot_hint();
868        assert_ne!(before.capability_manifest_hash, after.capability_manifest_hash);
869    }
870
871    #[test]
872    fn update_collapse_mode_collapses_old_tool_results_under_pressure() {
873        let mut mgr = ContextManager::new(1_000);
874        for i in 0..10 {
875            let m = Message::tool(vec![ContentPart::ToolResult {
876                call_id: format!("c{i}").into(),
877                output: "x".repeat(40),
878                is_error: false,
879            }]);
880            mgr.push_history(m, 40);
881        }
882        // Drive rho past collapse_threshold deterministically via observed prompt tokens.
883        mgr.set_observed_prompt_tokens(950); // 950 / 1000 = 0.95 >= 0.90
884        assert!(mgr.rho() >= mgr.config.collapse_threshold);
885
886        mgr.recompute_handle_residency();
887        // Oldest is collapsed; the most recent (within preserve_recent_msgs) stays resident.
888        assert_eq!(mgr.handles.residency_for_source("c0"), Some(&Residency::Collapsed));
889        assert_eq!(mgr.handles.residency_for_source("c9"), Some(&Residency::Resident));
890
891        // P0-C — monotonic within a generation: once collapsed, dropping pressure does NOT
892        // un-collapse (un-collapsing would re-bill the body and churn the cache prefix).
893        mgr.set_observed_prompt_tokens(100); // 0.10 < 0.90
894        mgr.recompute_handle_residency();
895        assert_eq!(
896            mgr.handles.residency_for_source("c0"),
897            Some(&Residency::Collapsed),
898            "collapse is sticky until a compaction boundary"
899        );
900
901        // Only a generation reset (compaction/renewal) un-collapses.
902        mgr.reset_collapse_generation();
903        assert_eq!(mgr.handles.residency_for_source("c0"), Some(&Residency::Resident));
904    }
905
906    #[test]
907    fn frozen_prefix_len_anchors_at_compaction_and_holds_across_appends() {
908        let mut mgr = ContextManager::new(1_000);
909        // Pre-compaction: no frozen region yet → providers use the rolling-pair fallback.
910        for i in 0..30 {
911            mgr.push_history(Message::user(format!("turn {i}: {}", "ctx ".repeat(30))), 150);
912        }
913        assert!(mgr.render().frozen_prefix_len.is_none(), "no frozen region before any compaction");
914
915        let (saved, _, archived, _) = mgr.compress(PressureAction::AutoCompact);
916        assert!(saved > 0 && !archived.is_empty(), "expected archival");
917
918        // Immediately after compaction the hot tail is empty → deep would coincide with the tail → None.
919        assert!(mgr.render().frozen_prefix_len.is_none(), "deep == tail right after compaction");
920
921        // As turns are appended, the deep boundary holds fixed while the tail grows.
922        mgr.push_history(Message::user("new 1"), 5);
923        let f1 = mgr.render().frozen_prefix_len.expect("frozen region exists once the tail grows");
924        mgr.push_history(Message::assistant("reply 1"), 5);
925        mgr.push_history(Message::user("new 2"), 5);
926        let rc = mgr.render();
927        let f2 = rc.frozen_prefix_len.expect("frozen region holds");
928        assert_eq!(f1, f2, "the deep boundary is fixed between compactions; only the tail grows");
929        assert!(f2 < rc.turns.len(), "deep boundary is distinct from the rolling tail");
930    }
931
932    #[test]
933    fn frozen_boundary_holds_through_a_prefix_safe_compaction() {
934        // P2-D × P1-E: the boundary re-anchors on a prefix-breaking compaction (cache_at = Some) but
935        // is preserved through a prefix-safe one (cache_at = None) — the deep cache survives.
936        let mut mgr = ContextManager::new(10_000);
937        for i in 0..5 {
938            mgr.push_history(Message::user(format!("m{i}")), 5);
939        }
940        mgr.frozen_history_len = 3; // pretend a prior compaction anchored the deep cache here
941
942        // A no-op / prefix-safe compaction (PressureAction::None ⇒ cache_at None) must NOT move the
943        // anchor — the cached [0..3] prefix is untouched, so the deep breakpoint stays put.
944        let (_, _, _, cache_at) = mgr.compress(PressureAction::None);
945        assert!(cache_at.is_none(), "no-op compaction is prefix-safe");
946        assert_eq!(mgr.frozen_history_len, 3, "prefix-safe compaction preserves the deep-cache anchor");
947    }
948
949    #[test]
950    fn collapse_generation_resets_on_autocompact() {
951        let mut mgr = ContextManager::new(1_000);
952        // Many oversized tool results: some will be archived by AutoCompact, the survivors
953        // should come back Resident (fresh generation), not stay stuck Collapsed.
954        for i in 0..20 {
955            mgr.push_history(tool_result_msg(&format!("c{i}"), &"x".repeat(120)), 60);
956        }
957        mgr.set_observed_prompt_tokens(980); // force collapse of the older results
958        mgr.recompute_handle_residency();
959        assert_eq!(mgr.handles.residency_for_source("c0"), Some(&Residency::Collapsed));
960
961        let (saved, _, archived, _) = mgr.compress(PressureAction::AutoCompact);
962        assert!(saved > 0 && !archived.is_empty(), "expected archival");
963
964        // Every surviving tool-result handle is Resident again — the compaction boundary
965        // rewrote the prefix, so the next pressure cycle re-decides from scratch.
966        for h in mgr.handles.all() {
967            if matches!(h.kind, HandleKind::ToolResult) {
968                assert_eq!(h.residency, Residency::Resident, "generation reset un-collapses survivors");
969            }
970        }
971    }
972
973    #[test]
974    fn mark_spooled_sets_residency_and_survives_residency_recompute() {
975        let mut mgr = ContextManager::new(1_000);
976        mgr.push_history(
977            Message::tool(vec![ContentPart::ToolResult {
978                call_id: "big".into(),
979                output: "preview only".to_string(),
980                is_error: false,
981            }]),
982            10,
983        );
984        mgr.mark_spooled("big", "disk://big");
985        assert_eq!(
986            mgr.handles.residency_for_source("big"),
987            Some(&Residency::SpooledOut { r: "disk://big".to_string() })
988        );
989
990        // Even under collapse pressure, a spooled handle is not pulled into the
991        // Resident<->Collapsed projection cycle.
992        mgr.set_observed_prompt_tokens(990);
993        mgr.recompute_handle_residency();
994        assert_eq!(
995            mgr.handles.residency_for_source("big"),
996            Some(&Residency::SpooledOut { r: "disk://big".to_string() })
997        );
998    }
999
1000    #[test]
1001    fn push_history_indexes_tool_results_as_resident_handles() {
1002        let mut mgr = ContextManager::new(10_000);
1003        let msg = Message::tool(vec![ContentPart::ToolResult {
1004            call_id: "call_1".into(),
1005            output: "the tool output".to_string(),
1006            is_error: false,
1007        }]);
1008        mgr.push_history(msg, 20);
1009        // A handle was indexed, anchored to the call_id, resident by default.
1010        assert_eq!(mgr.handles.all().len(), 1);
1011        assert_eq!(
1012            mgr.handles.residency_for_source("call_1"),
1013            Some(&Residency::Resident)
1014        );
1015        // A plain text turn allocates no handle.
1016        mgr.push_history(Message::user("hello"), 5);
1017        assert_eq!(mgr.handles.all().len(), 1);
1018    }
1019
1020    // ── W1-3: handle-table GC (prune orphaned handles + bounded recompute) ──
1021
1022    fn tool_result_msg(call_id: &str, output: &str) -> Message {
1023        Message::tool(vec![ContentPart::ToolResult {
1024            call_id: call_id.into(),
1025            output: output.to_string(),
1026            is_error: false,
1027        }])
1028    }
1029
1030    #[test]
1031    fn effective_rho_discounts_paged_out_handles() {
1032        let mut mgr = ContextManager::new(1_000);
1033        // A large tool-result output so its handle carries a real token weight.
1034        let big = "data ".repeat(200);
1035        let tok = mgr.engine.count(&big);
1036        mgr.push_history(tool_result_msg("c0", &big), tok);
1037        mgr.push_history(Message::user("u"), 50);
1038
1039        let raw = mgr.rho();
1040        // Everything resident → effective equals raw (behavior-preserving when nothing is paged).
1041        assert_eq!(mgr.handles.non_resident_tokens(), 0);
1042        assert!((mgr.effective_rho() - raw).abs() < f64::EPSILON);
1043
1044        // Page the tool result out of working context.
1045        mgr.mark_spooled("c0", "disk://c0");
1046        let paged = mgr.handles.non_resident_tokens();
1047        assert!(paged > 0, "handle is now non-resident with a real token weight");
1048
1049        // Raw rho is unchanged (partitions are untouched by the non-destructive projection)...
1050        assert!((mgr.rho() - raw).abs() < f64::EPSILON, "raw rho unchanged by paging");
1051        // ...but effective rho drops by exactly the paged tokens — paging relieves pressure now.
1052        let total = mgr.partitions.total_tokens(&mgr.engine);
1053        let expected = total.saturating_sub(paged) as f64 / 1_000.0;
1054        assert!((mgr.effective_rho() - expected).abs() < f64::EPSILON);
1055        assert!(mgr.effective_rho() < raw, "effective pressure relieved by paging");
1056
1057        // When provider usage is authoritative, the rendered prompt was already collapsed, so
1058        // effective falls back to raw (no double-discount).
1059        mgr.set_observed_prompt_tokens(900);
1060        assert!((mgr.effective_rho() - mgr.rho()).abs() < f64::EPSILON);
1061    }
1062
1063    #[test]
1064    fn prune_orphaned_handles_drops_handles_whose_message_left_history() {
1065        let mut mgr = ContextManager::new(10_000);
1066        mgr.push_history(tool_result_msg("c0", "out 0"), 20);
1067        mgr.push_history(tool_result_msg("c1", "out 1"), 20);
1068        assert_eq!(mgr.handles.all().len(), 2);
1069
1070        // Simulate compaction archiving the oldest tool-result message out of history.
1071        mgr.partitions.history.messages.remove(0);
1072        mgr.prune_orphaned_handles();
1073
1074        // The handle for the evicted message is gone; the live one is retained.
1075        assert_eq!(mgr.handles.all().len(), 1);
1076        assert!(mgr.handles.residency_for_source("c0").is_none());
1077        assert_eq!(
1078            mgr.handles.residency_for_source("c1"),
1079            Some(&Residency::Resident)
1080        );
1081    }
1082
1083    #[test]
1084    fn autocompact_prunes_handles_for_archived_tool_results() {
1085        let mut mgr = ContextManager::new(1_000);
1086        // Enough oversized tool results to force AutoCompact to archive some.
1087        for i in 0..30 {
1088            mgr.push_history(tool_result_msg(&format!("c{i}"), &"x".repeat(200)), 80);
1089        }
1090        assert_eq!(mgr.handles.all().len(), 30);
1091
1092        let (saved, _, archived, _) = mgr.compress(PressureAction::AutoCompact);
1093        assert!(saved > 0 && !archived.is_empty(), "expected archival");
1094
1095        // After compaction the table tracks only the tool results still in working history —
1096        // not the whole session. (No handle outlives its backing message.)
1097        let live_tool_results = mgr
1098            .partitions
1099            .history
1100            .messages
1101            .iter()
1102            .filter(|m| matches!(&m.content, Content::Parts(p)
1103                if p.iter().any(|x| matches!(x, ContentPart::ToolResult { .. }))))
1104            .count();
1105        assert_eq!(mgr.handles.all().len(), live_tool_results);
1106        assert!(mgr.handles.all().len() < 30, "table must shrink with archival");
1107    }
1108
1109    #[test]
1110    fn renew_prunes_handles_for_dropped_history() {
1111        let mut mgr = ContextManager::new(1_000);
1112        mgr.init_task("g".to_string(), vec![]);
1113        for i in 0..20 {
1114            mgr.push_history(tool_result_msg(&format!("c{i}"), "data"), 60);
1115        }
1116        mgr.renew();
1117        // Every retained handle must still be anchored to a message present in the renewed history.
1118        for h in mgr.handles.all() {
1119            if let Some(src) = h.source.as_ref() {
1120                assert!(
1121                    mgr.handles.residency_for_source(src).is_some(),
1122                    "no dangling handle survives renewal"
1123                );
1124            }
1125        }
1126        assert!(mgr.handles.all().len() <= 20);
1127    }
1128
1129    #[test]
1130    fn recompute_residency_index_semantics_with_spooled_in_the_middle() {
1131        // Locks the O(n)-rewrite's index/cutoff semantics against the old id+get_mut version:
1132        // a spooled handle still occupies an index position but is never toggled.
1133        let mut mgr = ContextManager::new(1_000);
1134        for i in 0..6 {
1135            mgr.push_history(tool_result_msg(&format!("c{i}"), &"y".repeat(40)), 40);
1136        }
1137        mgr.mark_spooled("c2", "disk://c2");
1138
1139        mgr.set_observed_prompt_tokens(950); // rho >= collapse_threshold
1140        mgr.recompute_handle_residency();
1141
1142        // Spooled stays spooled; the most recent preserve_recent_msgs stay resident; older collapse.
1143        assert_eq!(
1144            mgr.handles.residency_for_source("c2"),
1145            Some(&Residency::SpooledOut { r: "disk://c2".to_string() })
1146        );
1147        assert_eq!(mgr.handles.residency_for_source("c0"), Some(&Residency::Collapsed));
1148        assert_eq!(mgr.handles.residency_for_source("c5"), Some(&Residency::Resident));
1149    }
1150}