zeph_context/
slot.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Context slot types, compaction outcome, and message-chunking helpers.
5//!
6//! [`ContextSlot`] tags async fetch results so the assembler's `FuturesUnordered`
7//! collector can dispatch results without tuple indexing.
8//!
9//! [`CompactionOutcome`] communicates the result of one compaction attempt to
10//! `maybe_compact` in `zeph-core`.
11
12use zeph_llm::provider::Message;
13
14/// Tagged output of each concurrent context-fetch future.
15///
16/// Using an enum instead of a tuple allows individual sources to be added or
17/// removed (including cfg-gated ones) without rewriting the join combinator.
18pub enum ContextSlot {
19    /// Past-session summaries (contextual recall).
20    Summaries(Option<Message>),
21    /// Cross-session memory recall.
22    CrossSession(Option<Message>),
23    /// Semantic recall result. Carries the formatted message and the top-1 similarity score.
24    SemanticRecall(Option<Message>, Option<f32>),
25    /// Document RAG result.
26    DocumentRag(Option<Message>),
27    /// Past user corrections recalled for this turn.
28    Corrections(Option<Message>),
29    /// Code-index RAG result (repo-map or file context).
30    CodeContext(Option<String>),
31    /// Knowledge graph fact recall.
32    GraphFacts(Option<Message>),
33    /// Persona memory facts injected after the system prompt (#2461).
34    PersonaFacts(Option<Message>),
35    /// Top-k procedural trajectory hints recalled for the current turn (#2498).
36    TrajectoryHints(Option<Message>),
37    /// `TiMem` tree summary nodes recalled for context (#2262).
38    TreeMemory(Option<Message>),
39}
40
41/// Return type from `compact_context()` that distinguishes between successful compaction,
42/// probe rejection, and no-op.
43///
44/// Gives `maybe_compact()` enough information to handle probe rejection without triggering
45/// the `Exhausted` state — which would only be correct if summarization itself is stuck.
46#[derive(Debug, Clone, Copy, PartialEq, Eq)]
47pub enum CompactionOutcome {
48    /// Messages were drained and replaced with a summary.
49    Compacted,
50    /// Probe rejected the summary — original messages are preserved.
51    /// Caller must NOT check `freed_tokens` or transition to `Exhausted`.
52    ProbeRejected,
53    /// No compaction was performed (too few messages, empty `to_compact`, etc.).
54    NoChange,
55}
56
57/// Prefix prepended to persona memory injections.
58pub const PERSONA_PREFIX: &str = "[Persona context]\n";
59/// Prefix prepended to trajectory-hint injections.
60pub const TRAJECTORY_PREFIX: &str = "[Past experience]\n";
61/// Prefix prepended to `TiMem` tree memory injections.
62pub const TREE_MEMORY_PREFIX: &str = "[Memory summary]\n";
63
64/// Split a message slice into chunks that each fit within `budget` tokens.
65///
66/// Messages larger than `oversized` tokens each get their own chunk. All other
67/// messages are greedily packed. Callers that need at least one chunk will always
68/// receive one (empty `Vec<Message>` wrapped in a single chunk).
69#[must_use]
70pub fn chunk_messages(
71    messages: &[Message],
72    budget: usize,
73    oversized: usize,
74    tc: &zeph_memory::TokenCounter,
75) -> Vec<Vec<Message>> {
76    let mut chunks: Vec<Vec<Message>> = Vec::new();
77    let mut current: Vec<Message> = Vec::new();
78    let mut current_tokens = 0usize;
79
80    for msg in messages {
81        let msg_tokens = tc.count_message_tokens(msg);
82
83        if msg_tokens >= oversized {
84            if !current.is_empty() {
85                chunks.push(std::mem::take(&mut current));
86                current_tokens = 0;
87            }
88            chunks.push(vec![msg.clone()]);
89        } else if current_tokens + msg_tokens > budget && !current.is_empty() {
90            chunks.push(std::mem::take(&mut current));
91            current_tokens = 0;
92            current.push(msg.clone());
93            current_tokens += msg_tokens;
94        } else {
95            current.push(msg.clone());
96            current_tokens += msg_tokens;
97        }
98    }
99
100    if !current.is_empty() {
101        chunks.push(current);
102    }
103
104    if chunks.is_empty() {
105        chunks.push(Vec::new());
106    }
107
108    chunks
109}
110
111/// Cap an LLM summary to `max_chars` characters (SEC-02).
112///
113/// Prevents a misbehaving LLM backend from returning an arbitrarily large summary that
114/// would expand rather than shrink the context window after compaction.
115#[must_use]
116pub fn cap_summary(s: String, max_chars: usize) -> String {
117    match s.char_indices().nth(max_chars) {
118        Some((byte_idx, _)) => {
119            tracing::warn!(
120                original_chars = s.chars().count(),
121                cap = max_chars,
122                "LLM summary exceeded cap, truncating"
123            );
124            format!("{}…", &s[..byte_idx])
125        }
126        None => s,
127    }
128}
zeph_context/slot.rs

zeph_context/
slot.rs