Skip to main content

zeph_context/
slot.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Context slot types, compaction outcome, and message-chunking helpers.
5//!
6//! [`ContextSlot`] tags async fetch results so the assembler's `FuturesUnordered`
7//! collector can dispatch results without tuple indexing.
8//!
9//! [`CompactionOutcome`] communicates the result of one compaction attempt to
10//! `maybe_compact` in `zeph-core`.
11
12use zeph_llm::provider::Message;
13
14/// Tagged output of each concurrent context-fetch future.
15///
16/// Using an enum instead of a tuple allows individual sources to be added or
17/// removed (including cfg-gated ones) without rewriting the join combinator.
18#[non_exhaustive]
19pub enum ContextSlot {
20    /// Past-session summaries (contextual recall).
21    Summaries(Option<Message>),
22    /// Cross-session memory recall.
23    CrossSession(Option<Message>),
24    /// Semantic recall result. Carries the formatted message and the top-1 similarity score.
25    SemanticRecall(Option<Message>, Option<f32>),
26    /// Document RAG result.
27    DocumentRag(Option<Message>),
28    /// Past user corrections recalled for this turn.
29    Corrections(Option<Message>),
30    /// Code-index RAG result (repo-map or file context).
31    CodeContext(Option<String>),
32    /// Knowledge graph fact recall.
33    GraphFacts(Option<Message>),
34    /// Persona memory facts injected after the system prompt (#2461).
35    PersonaFacts(Option<Message>),
36    /// Top-k procedural trajectory hints recalled for the current turn (#2498).
37    TrajectoryHints(Option<Message>),
38    /// `TiMem` tree summary nodes recalled for context (#2262).
39    TreeMemory(Option<Message>),
40    /// Distilled reasoning strategies recalled for the current turn (#3343).
41    ///
42    /// The second field carries the `JoinHandle` for the background `mark_reasoning_used` task
43    /// spawned after injection. Callers must store it in `PreparedContext::background_tasks`.
44    ReasoningStrategies(Option<Message>, Option<tokio::task::JoinHandle<()>>),
45}
46
47/// Return type from `compact_context()` that distinguishes between successful compaction,
48/// probe rejection, and no-op.
49///
50/// Gives `maybe_compact()` enough information to handle probe rejection without triggering
51/// the `Exhausted` state — which would only be correct if summarization itself is stuck.
52#[non_exhaustive]
53#[derive(Debug, Clone, Copy, PartialEq, Eq)]
54pub enum CompactionOutcome {
55    /// Messages were drained and replaced with a summary.
56    Compacted,
57    /// Probe rejected the summary — original messages are preserved.
58    /// Caller must NOT check `freed_tokens` or transition to `Exhausted`.
59    ProbeRejected,
60    /// No compaction was performed (too few messages, empty `to_compact`, etc.).
61    NoChange,
62}
63
64/// Prefix prepended to persona memory injections.
65pub const PERSONA_PREFIX: &str = "[Persona context]\n";
66/// Prefix prepended to trajectory-hint injections.
67pub const TRAJECTORY_PREFIX: &str = "[Past experience]\n";
68/// Prefix prepended to reasoning-strategy injections.
69pub const REASONING_PREFIX: &str = "[Reasoning Strategy]\n";
70/// Prefix prepended to `TiMem` tree memory injections.
71pub const TREE_MEMORY_PREFIX: &str = "[Memory summary]\n";
72
73/// Split a message slice into chunks that each fit within `budget` tokens.
74///
75/// Messages larger than `oversized` tokens each get their own chunk. All other
76/// messages are greedily packed. Callers that need at least one chunk will always
77/// receive one (empty `Vec<Message>` wrapped in a single chunk).
78///
79/// `count_message_tokens` is a caller-supplied function that returns the token count
80/// for a single message. This avoids a direct dependency on `zeph-memory::TokenCounter`.
81#[must_use]
82pub fn chunk_messages(
83    messages: &[Message],
84    budget: usize,
85    oversized: usize,
86    count_message_tokens: impl Fn(&Message) -> usize,
87) -> Vec<Vec<Message>> {
88    let mut chunks: Vec<Vec<Message>> = Vec::new();
89    let mut current: Vec<Message> = Vec::new();
90    let mut current_tokens = 0usize;
91
92    for msg in messages {
93        let msg_tokens = count_message_tokens(msg);
94
95        if msg_tokens >= oversized {
96            if !current.is_empty() {
97                chunks.push(std::mem::take(&mut current));
98                current_tokens = 0;
99            }
100            chunks.push(vec![msg.clone()]);
101        } else if current_tokens + msg_tokens > budget && !current.is_empty() {
102            chunks.push(std::mem::take(&mut current));
103            current_tokens = 0;
104            current.push(msg.clone());
105            current_tokens += msg_tokens;
106        } else {
107            current.push(msg.clone());
108            current_tokens += msg_tokens;
109        }
110    }
111
112    if !current.is_empty() {
113        chunks.push(current);
114    }
115
116    if chunks.is_empty() {
117        chunks.push(Vec::new());
118    }
119
120    chunks
121}
122
123/// Cap an LLM summary to `max_chars` characters (SEC-02).
124///
125/// Prevents a misbehaving LLM backend from returning an arbitrarily large summary that
126/// would expand rather than shrink the context window after compaction.
127#[must_use]
128pub fn cap_summary(s: String, max_chars: usize) -> String {
129    match s.char_indices().nth(max_chars) {
130        Some((byte_idx, _)) => {
131            tracing::warn!(
132                original_chars = s.chars().count(),
133                cap = max_chars,
134                "LLM summary exceeded cap, truncating"
135            );
136            format!("{}…", &s[..byte_idx])
137        }
138        None => s,
139    }
140}