Skip to main content

arcan_core/
context.rs

1use crate::protocol::{ChatMessage, Role};
2
3/// Configuration for context window management.
4#[derive(Debug, Clone)]
5pub struct ContextConfig {
6    /// Maximum estimated tokens for the context window.
7    /// Messages will be compacted when approaching this limit.
8    pub max_context_tokens: usize,
9    /// Reserve this many tokens for the model's response.
10    /// Context budget = max_context_tokens - reserve_output_tokens.
11    pub reserve_output_tokens: usize,
12}
13
14impl Default for ContextConfig {
15    fn default() -> Self {
16        Self {
17            max_context_tokens: 200_000,
18            reserve_output_tokens: 8_192,
19        }
20    }
21}
22
23impl ContextConfig {
24    /// Context budget available for input messages.
25    pub fn input_budget(&self) -> usize {
26        self.max_context_tokens
27            .saturating_sub(self.reserve_output_tokens)
28    }
29}
30
31/// Result of a context compaction operation.
32#[derive(Debug, Clone)]
33pub struct CompactionResult {
34    /// Messages after compaction.
35    pub messages: Vec<ChatMessage>,
36    /// Number of messages dropped.
37    pub dropped_count: usize,
38    /// Estimated tokens before compaction.
39    pub tokens_before: usize,
40    /// Estimated tokens after compaction.
41    pub tokens_after: usize,
42}
43
44/// Estimate the token count of a message using a character-based heuristic.
45///
46/// Uses ~4 characters per token as a rough approximation.
47/// This is intentionally conservative (overestimates) to avoid exceeding limits.
48pub fn estimate_tokens(text: &str) -> usize {
49    // ~4 chars per token on average for English text.
50    // Add overhead for message framing (role, formatting).
51    let content_tokens = text.len().div_ceil(4);
52    // ~4 tokens overhead per message for role/formatting
53    content_tokens + 4
54}
55
56/// Estimate total tokens for a message list.
57pub fn estimate_total_tokens(messages: &[ChatMessage]) -> usize {
58    messages.iter().map(|m| estimate_tokens(&m.content)).sum()
59}
60
61/// Compact messages to fit within the context budget.
62///
63/// Strategy:
64/// 1. Always preserve system messages (they contain persona/instructions)
65/// 2. Always preserve the most recent user message (it's the current request)
66/// 3. Preserve tool results that are paired with tool calls still in context
67/// 4. Drop oldest non-system, non-final-user messages first
68///
69/// Returns `None` if no compaction was needed.
70pub fn compact_messages(
71    messages: &[ChatMessage],
72    config: &ContextConfig,
73) -> Option<CompactionResult> {
74    let budget = config.input_budget();
75    let tokens_before = estimate_total_tokens(messages);
76
77    if tokens_before <= budget {
78        return None;
79    }
80
81    // Separate messages into categories
82    let mut system_msgs: Vec<(usize, &ChatMessage)> = Vec::new();
83    let mut other_msgs: Vec<(usize, &ChatMessage)> = Vec::new();
84
85    for (i, msg) in messages.iter().enumerate() {
86        if msg.role == Role::System {
87            system_msgs.push((i, msg));
88        } else {
89            other_msgs.push((i, msg));
90        }
91    }
92
93    // System messages are always kept
94
95    // The last user message is always kept (it's the current request)
96    let last_user = other_msgs.iter().rposition(|(_i, m)| m.role == Role::User);
97
98    let mut keep_indices: Vec<usize> = system_msgs.iter().map(|(i, _)| *i).collect();
99
100    if let Some(last_user_pos) = last_user {
101        keep_indices.push(other_msgs[last_user_pos].0);
102    }
103
104    // Budget remaining after system messages and last user message
105    let fixed_tokens: usize = keep_indices
106        .iter()
107        .map(|&i| estimate_tokens(&messages[i].content))
108        .sum();
109
110    let mut remaining_budget = budget.saturating_sub(fixed_tokens);
111
112    // Add non-fixed messages from most recent to oldest (recency bias)
113    let mut candidate_indices: Vec<usize> = other_msgs
114        .iter()
115        .map(|(i, _)| *i)
116        .filter(|i| !keep_indices.contains(i))
117        .collect();
118
119    // Reverse to process most recent first
120    candidate_indices.reverse();
121
122    let mut accepted: Vec<usize> = Vec::new();
123    for idx in &candidate_indices {
124        let msg_tokens = estimate_tokens(&messages[*idx].content);
125        if msg_tokens <= remaining_budget {
126            accepted.push(*idx);
127            remaining_budget = remaining_budget.saturating_sub(msg_tokens);
128        }
129        // If a message doesn't fit, skip it (drop from context)
130    }
131
132    // Combine kept indices and sort by original position
133    keep_indices.extend(accepted);
134    keep_indices.sort_unstable();
135    keep_indices.dedup();
136
137    let dropped_count = messages.len() - keep_indices.len();
138    if dropped_count == 0 {
139        return None;
140    }
141
142    let compacted: Vec<ChatMessage> = keep_indices.iter().map(|&i| messages[i].clone()).collect();
143
144    let tokens_after = estimate_total_tokens(&compacted);
145
146    // Safety: if we somehow still exceeded budget with just system + last user,
147    // that's a fundamental limit we can't fix by dropping more messages.
148    // Just return what we have.
149
150    Some(CompactionResult {
151        messages: compacted,
152        dropped_count,
153        tokens_before,
154        tokens_after,
155    })
156}
157
158#[cfg(test)]
159mod tests {
160    use super::*;
161    use crate::protocol::ChatMessage;
162
163    fn make_msg(role: Role, content: &str) -> ChatMessage {
164        match role {
165            Role::System => ChatMessage::system(content),
166            Role::User => ChatMessage::user(content),
167            Role::Assistant => ChatMessage::assistant(content),
168            Role::Tool => ChatMessage::tool(content),
169        }
170    }
171
172    #[test]
173    fn no_compaction_when_within_budget() {
174        let messages = vec![
175            make_msg(Role::System, "You are an agent."),
176            make_msg(Role::User, "Hello"),
177            make_msg(Role::Assistant, "Hi there!"),
178        ];
179        let config = ContextConfig {
180            max_context_tokens: 100_000,
181            reserve_output_tokens: 4_096,
182        };
183        assert!(compact_messages(&messages, &config).is_none());
184    }
185
186    #[test]
187    fn compaction_drops_oldest_messages() {
188        // Create messages that exceed a small budget
189        let mut messages = vec![make_msg(Role::System, "sys")];
190        for i in 0..50 {
191            messages.push(make_msg(Role::User, &format!("user message {i}")));
192            messages.push(make_msg(
193                Role::Assistant,
194                &"long response text ".repeat(100),
195            ));
196        }
197        // Final user message
198        messages.push(make_msg(Role::User, "current question"));
199
200        let config = ContextConfig {
201            max_context_tokens: 2_000,
202            reserve_output_tokens: 500,
203        };
204
205        let result = compact_messages(&messages, &config).expect("should compact");
206        assert!(result.dropped_count > 0);
207        assert!(result.tokens_after <= config.input_budget());
208
209        // System message preserved
210        assert_eq!(result.messages[0].role, Role::System);
211        assert_eq!(result.messages[0].content, "sys");
212
213        // Last user message preserved
214        assert!(
215            result
216                .messages
217                .iter()
218                .any(|m| m.content == "current question")
219        );
220    }
221
222    #[test]
223    fn system_messages_always_preserved() {
224        let messages = vec![
225            make_msg(Role::System, "system prompt 1"),
226            make_msg(Role::System, "system prompt 2"),
227            make_msg(Role::User, &"long msg ".repeat(500)),
228            make_msg(Role::Assistant, &"long reply ".repeat(500)),
229            make_msg(Role::User, "current"),
230        ];
231
232        let config = ContextConfig {
233            max_context_tokens: 500,
234            reserve_output_tokens: 100,
235        };
236
237        let result = compact_messages(&messages, &config).expect("should compact");
238
239        // Both system messages preserved
240        let system_count = result
241            .messages
242            .iter()
243            .filter(|m| m.role == Role::System)
244            .count();
245        assert_eq!(system_count, 2);
246    }
247
248    #[test]
249    fn last_user_message_always_preserved() {
250        let messages = vec![
251            make_msg(Role::System, "sys"),
252            make_msg(Role::User, &"old ".repeat(500)),
253            make_msg(Role::Assistant, &"reply ".repeat(500)),
254            make_msg(Role::User, "latest question"),
255        ];
256
257        let config = ContextConfig {
258            max_context_tokens: 200,
259            reserve_output_tokens: 50,
260        };
261
262        let result = compact_messages(&messages, &config).expect("should compact");
263        let last = result.messages.last().expect("should have messages");
264        assert_eq!(last.content, "latest question");
265    }
266
267    #[test]
268    fn recency_bias_keeps_newer_messages() {
269        let mut messages = vec![make_msg(Role::System, "sys")];
270        // Add many messages, each large enough to force compaction
271        for i in 0..20 {
272            messages.push(make_msg(
273                Role::User,
274                &format!("question {i} {}", "q".repeat(200)),
275            ));
276            messages.push(make_msg(
277                Role::Assistant,
278                &format!("answer {i} {}", "x".repeat(200)),
279            ));
280        }
281        messages.push(make_msg(Role::User, "final"));
282
283        let config = ContextConfig {
284            max_context_tokens: 1_000,
285            reserve_output_tokens: 200,
286        };
287
288        let result = compact_messages(&messages, &config).expect("should compact");
289
290        // The most recent messages should be preserved (recency bias)
291        let has_recent = result
292            .messages
293            .iter()
294            .any(|m| m.content.contains("answer 19"));
295        let has_old = result
296            .messages
297            .iter()
298            .any(|m| m.content.contains("answer 0"));
299
300        assert!(has_recent, "Recent messages should be kept");
301        // Old messages may or may not be there depending on budget,
302        // but if compaction happened, old should be dropped first
303        if result.dropped_count > 2 {
304            assert!(!has_old, "Old messages should be dropped first");
305        }
306    }
307
308    #[test]
309    fn empty_messages_no_compaction() {
310        let messages: Vec<ChatMessage> = Vec::new();
311        let config = ContextConfig::default();
312        assert!(compact_messages(&messages, &config).is_none());
313    }
314
315    #[test]
316    fn single_user_message_no_compaction_if_within_budget() {
317        let messages = vec![make_msg(Role::User, "hello")];
318        let config = ContextConfig::default();
319        assert!(compact_messages(&messages, &config).is_none());
320    }
321
322    #[test]
323    fn estimate_tokens_reasonable() {
324        // "hello" = 5 chars → ~1-2 tokens + 4 overhead
325        let tokens = estimate_tokens("hello");
326        assert!(
327            tokens >= 5,
328            "Should have at least 5 tokens for 'hello' + overhead"
329        );
330        assert!(tokens <= 10, "Should not be excessive");
331
332        // Empty string
333        let empty = estimate_tokens("");
334        assert!(empty >= 4, "Should have overhead");
335
336        // Long text: 1000 chars → ~250 content tokens + 4 overhead
337        let long = estimate_tokens(&"a".repeat(1000));
338        assert!(long >= 250);
339        assert!(long <= 260);
340    }
341
342    #[test]
343    fn default_config_reasonable() {
344        let config = ContextConfig::default();
345        assert_eq!(config.max_context_tokens, 200_000);
346        assert_eq!(config.reserve_output_tokens, 8_192);
347        assert!(config.input_budget() > 190_000);
348    }
349
350    #[test]
351    fn compaction_result_reports_accurate_counts() {
352        let mut messages = vec![make_msg(Role::System, "sys")];
353        for i in 0..10 {
354            messages.push(make_msg(Role::User, &format!("q{i}")));
355            messages.push(make_msg(Role::Assistant, &"x".repeat(200)));
356        }
357        messages.push(make_msg(Role::User, "final"));
358
359        let config = ContextConfig {
360            max_context_tokens: 300,
361            reserve_output_tokens: 50,
362        };
363
364        let result = compact_messages(&messages, &config).expect("should compact");
365        assert_eq!(result.messages.len() + result.dropped_count, messages.len());
366        assert!(result.tokens_before > result.tokens_after);
367    }
368
369    #[test]
370    fn tool_messages_can_be_dropped() {
371        let messages = vec![
372            make_msg(Role::System, "sys"),
373            make_msg(Role::User, "q1"),
374            make_msg(Role::Assistant, "calling tool"),
375            ChatMessage::tool_result("call-1", "x".repeat(500)),
376            make_msg(Role::User, "current"),
377        ];
378
379        let config = ContextConfig {
380            max_context_tokens: 100,
381            reserve_output_tokens: 20,
382        };
383
384        let result = compact_messages(&messages, &config).expect("should compact");
385        // Tool result is large and old — it should be dropped
386        assert!(result.dropped_count > 0);
387        assert!(result.messages.iter().any(|m| m.content == "current"));
388    }
389}