zeph_context/slot.rs
1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Context slot types, compaction outcome, and message-chunking helpers.
5//!
6//! [`ContextSlot`] tags async fetch results so the assembler's `FuturesUnordered`
7//! collector can dispatch results without tuple indexing.
8//!
9//! [`CompactionOutcome`] communicates the result of one compaction attempt to
10//! `maybe_compact` in `zeph-core`.
11
12use zeph_llm::provider::Message;
13
14/// Tagged output of each concurrent context-fetch future.
15///
16/// Using an enum instead of a tuple allows individual sources to be added or
17/// removed (including cfg-gated ones) without rewriting the join combinator.
18pub enum ContextSlot {
19 /// Past-session summaries (contextual recall).
20 Summaries(Option<Message>),
21 /// Cross-session memory recall.
22 CrossSession(Option<Message>),
23 /// Semantic recall result. Carries the formatted message and the top-1 similarity score.
24 SemanticRecall(Option<Message>, Option<f32>),
25 /// Document RAG result.
26 DocumentRag(Option<Message>),
27 /// Past user corrections recalled for this turn.
28 Corrections(Option<Message>),
29 /// Code-index RAG result (repo-map or file context).
30 CodeContext(Option<String>),
31 /// Knowledge graph fact recall.
32 GraphFacts(Option<Message>),
33 /// Persona memory facts injected after the system prompt (#2461).
34 PersonaFacts(Option<Message>),
35 /// Top-k procedural trajectory hints recalled for the current turn (#2498).
36 TrajectoryHints(Option<Message>),
37 /// `TiMem` tree summary nodes recalled for context (#2262).
38 TreeMemory(Option<Message>),
39 /// Distilled reasoning strategies recalled for the current turn (#3343).
40 ReasoningStrategies(Option<Message>),
41}
42
43/// Return type from `compact_context()` that distinguishes between successful compaction,
44/// probe rejection, and no-op.
45///
46/// Gives `maybe_compact()` enough information to handle probe rejection without triggering
47/// the `Exhausted` state — which would only be correct if summarization itself is stuck.
48#[derive(Debug, Clone, Copy, PartialEq, Eq)]
49pub enum CompactionOutcome {
50 /// Messages were drained and replaced with a summary.
51 Compacted,
52 /// Probe rejected the summary — original messages are preserved.
53 /// Caller must NOT check `freed_tokens` or transition to `Exhausted`.
54 ProbeRejected,
55 /// No compaction was performed (too few messages, empty `to_compact`, etc.).
56 NoChange,
57}
58
59/// Prefix prepended to persona memory injections.
60pub const PERSONA_PREFIX: &str = "[Persona context]\n";
61/// Prefix prepended to trajectory-hint injections.
62pub const TRAJECTORY_PREFIX: &str = "[Past experience]\n";
63/// Prefix prepended to reasoning-strategy injections.
64pub const REASONING_PREFIX: &str = "[Reasoning Strategy]\n";
65/// Prefix prepended to `TiMem` tree memory injections.
66pub const TREE_MEMORY_PREFIX: &str = "[Memory summary]\n";
67
68/// Split a message slice into chunks that each fit within `budget` tokens.
69///
70/// Messages larger than `oversized` tokens each get their own chunk. All other
71/// messages are greedily packed. Callers that need at least one chunk will always
72/// receive one (empty `Vec<Message>` wrapped in a single chunk).
73///
74/// `count_message_tokens` is a caller-supplied function that returns the token count
75/// for a single message. This avoids a direct dependency on `zeph-memory::TokenCounter`.
76#[must_use]
77pub fn chunk_messages(
78 messages: &[Message],
79 budget: usize,
80 oversized: usize,
81 count_message_tokens: impl Fn(&Message) -> usize,
82) -> Vec<Vec<Message>> {
83 let mut chunks: Vec<Vec<Message>> = Vec::new();
84 let mut current: Vec<Message> = Vec::new();
85 let mut current_tokens = 0usize;
86
87 for msg in messages {
88 let msg_tokens = count_message_tokens(msg);
89
90 if msg_tokens >= oversized {
91 if !current.is_empty() {
92 chunks.push(std::mem::take(&mut current));
93 current_tokens = 0;
94 }
95 chunks.push(vec![msg.clone()]);
96 } else if current_tokens + msg_tokens > budget && !current.is_empty() {
97 chunks.push(std::mem::take(&mut current));
98 current_tokens = 0;
99 current.push(msg.clone());
100 current_tokens += msg_tokens;
101 } else {
102 current.push(msg.clone());
103 current_tokens += msg_tokens;
104 }
105 }
106
107 if !current.is_empty() {
108 chunks.push(current);
109 }
110
111 if chunks.is_empty() {
112 chunks.push(Vec::new());
113 }
114
115 chunks
116}
117
118/// Cap an LLM summary to `max_chars` characters (SEC-02).
119///
120/// Prevents a misbehaving LLM backend from returning an arbitrarily large summary that
121/// would expand rather than shrink the context window after compaction.
122#[must_use]
123pub fn cap_summary(s: String, max_chars: usize) -> String {
124 match s.char_indices().nth(max_chars) {
125 Some((byte_idx, _)) => {
126 tracing::warn!(
127 original_chars = s.chars().count(),
128 cap = max_chars,
129 "LLM summary exceeded cap, truncating"
130 );
131 format!("{}…", &s[..byte_idx])
132 }
133 None => s,
134 }
135}