llm/
context.rs

1use serde::{Deserialize, Serialize};
2
3use crate::catalog::LlmModel;
4use crate::chat_message::AssistantReasoning;
5use crate::reasoning::ReasoningEffort;
6use crate::types::IsoString;
7
8use super::{ChatMessage, ToolDefinition};
9
10#[doc = include_str!("docs/context.md")]
11#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct Context {
13    messages: Vec<ChatMessage>,
14    tools: Vec<ToolDefinition>,
15    #[serde(skip)]
16    reasoning_effort: Option<ReasoningEffort>,
17    #[serde(skip)]
18    prompt_cache_key: Option<String>,
19}
20
21impl Context {
22    pub fn new(messages: Vec<ChatMessage>, tools: Vec<ToolDefinition>) -> Self {
23        Self { messages, tools, reasoning_effort: None, prompt_cache_key: None }
24    }
25
26    pub fn prompt_cache_key(&self) -> Option<&str> {
27        self.prompt_cache_key.as_deref()
28    }
29
30    pub fn set_prompt_cache_key(&mut self, key: Option<String>) {
31        self.prompt_cache_key = key;
32    }
33
34    pub fn reasoning_effort(&self) -> Option<ReasoningEffort> {
35        self.reasoning_effort
36    }
37
38    pub fn set_reasoning_effort(&mut self, effort: Option<ReasoningEffort>) {
39        self.reasoning_effort = effort;
40    }
41
42    pub fn add_message(&mut self, message: ChatMessage) {
43        self.messages.push(message);
44    }
45
46    pub fn set_tools(&mut self, tools: Vec<ToolDefinition>) {
47        self.tools = tools;
48    }
49
50    pub fn set_system_content(&mut self, content: String) {
51        if let Some(ChatMessage::System { content: existing, .. }) = self.messages.first_mut() {
52            *existing = content;
53        } else {
54            self.messages.insert(0, ChatMessage::System { content, timestamp: IsoString::now() });
55        }
56    }
57
58    pub fn messages(&self) -> &Vec<ChatMessage> {
59        &self.messages
60    }
61
62    pub fn tools(&self) -> &Vec<ToolDefinition> {
63        &self.tools
64    }
65
66    /// Returns the number of messages in the context
67    pub fn message_count(&self) -> usize {
68        self.messages.len()
69    }
70
71    /// Estimate total token count using the ~4 bytes/token heuristic.
72    /// Includes messages and tool definitions. Used for pre-flight overflow detection.
73    pub fn estimated_token_count(&self) -> u32 {
74        let message_bytes: usize = self.messages.iter().map(ChatMessage::estimated_bytes).sum();
75        let tool_bytes: usize =
76            self.tools.iter().map(|t| t.name.len() + t.description.len() + t.parameters.len()).sum();
77        let total_bytes = message_bytes + tool_bytes;
78        u32::try_from(total_bytes / 4).unwrap_or(u32::MAX)
79    }
80
81    /// Build an assistant turn and its tool call results and append them to messages.
82    pub fn push_assistant_turn(
83        &mut self,
84        content: &str,
85        reasoning: AssistantReasoning,
86        completed_tools: Vec<Result<super::ToolCallResult, super::ToolCallError>>,
87    ) {
88        let tool_requests: Vec<_> = completed_tools
89            .iter()
90            .map(|result| match result {
91                Ok(r) => {
92                    super::ToolCallRequest { id: r.id.clone(), name: r.name.clone(), arguments: r.arguments.clone() }
93                }
94                Err(e) => super::ToolCallRequest {
95                    id: e.id.clone(),
96                    name: e.name.clone(),
97                    arguments: e.arguments.clone().unwrap_or_default(),
98                },
99            })
100            .collect();
101
102        self.messages.push(ChatMessage::Assistant {
103            content: content.to_string(),
104            reasoning,
105            timestamp: IsoString::now(),
106            tool_calls: tool_requests,
107        });
108
109        for result in completed_tools {
110            self.messages.push(ChatMessage::ToolCallResult(result));
111        }
112    }
113
114    /// Return a copy with encrypted reasoning filtered for the given model.
115    /// Encrypted content is kept only when its source model matches.
116    pub fn filter_encrypted_reasoning(&self, model: &LlmModel) -> Self {
117        let messages = self
118            .messages
119            .iter()
120            .map(|msg| match msg {
121                ChatMessage::Assistant { content, reasoning, timestamp, tool_calls } => ChatMessage::Assistant {
122                    content: content.clone(),
123                    reasoning: AssistantReasoning {
124                        summary_text: reasoning.summary_text.clone(),
125                        encrypted_content: reasoning
126                            .encrypted_content
127                            .as_ref()
128                            .filter(|ec| &ec.model == model)
129                            .cloned(),
130                    },
131                    timestamp: timestamp.clone(),
132                    tool_calls: tool_calls.clone(),
133                },
134                other => other.clone(),
135            })
136            .collect();
137        Context {
138            messages,
139            tools: self.tools.clone(),
140            reasoning_effort: self.reasoning_effort,
141            prompt_cache_key: self.prompt_cache_key.clone(),
142        }
143    }
144
145    /// Clear all non-system messages, retaining only system prompts.
146    pub fn clear_conversation(&mut self) {
147        self.messages.retain(super::chat_message::ChatMessage::is_system);
148    }
149
150    /// Replace all non-system messages while preserving the system prompt and runtime state.
151    pub fn replace_conversation(&mut self, messages: Vec<ChatMessage>) {
152        self.messages = self
153            .messages
154            .drain(..)
155            .filter(ChatMessage::is_system)
156            .chain(messages.into_iter().filter(|m| !m.is_system()))
157            .collect();
158    }
159
160    /// Get all non-system messages for summarization
161    pub fn messages_for_summary(&self) -> Vec<&ChatMessage> {
162        self.messages.iter().filter(|msg| !msg.is_system()).collect()
163    }
164
165    /// Create a new context with all messages replaced by a summary.
166    /// Preserves the system prompt and tools.
167    pub fn with_compacted_summary(&self, summary: &str) -> Context {
168        let system_messages: Vec<_> = self.messages.iter().filter(|msg| msg.is_system()).cloned().collect();
169
170        let non_system_count = self.messages.len() - system_messages.len();
171
172        let mut messages = system_messages;
173        if non_system_count > 0 {
174            messages.push(ChatMessage::Summary {
175                content: summary.to_string(),
176                timestamp: IsoString::now(),
177                messages_compacted: non_system_count,
178            });
179        }
180
181        Context {
182            messages,
183            tools: self.tools.clone(),
184            reasoning_effort: self.reasoning_effort,
185            prompt_cache_key: self.prompt_cache_key.clone(),
186        }
187    }
188}
189
190#[cfg(test)]
191mod tests {
192    use super::*;
193    use crate::ContentBlock;
194    use crate::ToolCallResult;
195    use crate::catalog::LlmModel;
196
197    fn create_test_context() -> Context {
198        let messages = vec![
199            ChatMessage::System { content: "You are a helpful assistant.".to_string(), timestamp: IsoString::now() },
200            ChatMessage::User { content: vec![ContentBlock::text("Hello")], timestamp: IsoString::now() },
201            ChatMessage::Assistant {
202                content: "Hi there!".to_string(),
203                reasoning: AssistantReasoning::default(),
204                timestamp: IsoString::now(),
205                tool_calls: vec![],
206            },
207            ChatMessage::ToolCallResult(Ok(ToolCallResult {
208                id: "1".to_string(),
209                name: "tool1".to_string(),
210                arguments: "{}".to_string(),
211                result: "Result 1".to_string(),
212            })),
213            ChatMessage::ToolCallResult(Ok(ToolCallResult {
214                id: "2".to_string(),
215                name: "tool2".to_string(),
216                arguments: "{}".to_string(),
217                result: "Result 2".to_string(),
218            })),
219            ChatMessage::ToolCallResult(Ok(ToolCallResult {
220                id: "3".to_string(),
221                name: "tool3".to_string(),
222                arguments: "{}".to_string(),
223                result: "Result 3".to_string(),
224            })),
225        ];
226        Context::new(messages, vec![])
227    }
228
229    #[test]
230    fn replace_conversation_preserves_system_message() {
231        let mut ctx = create_test_context();
232        ctx.replace_conversation(vec![ChatMessage::User {
233            content: vec![ContentBlock::text("new")],
234            timestamp: IsoString::now(),
235        }]);
236
237        assert_eq!(ctx.message_count(), 2);
238        assert!(ctx.messages()[0].is_system());
239        assert!(matches!(ctx.messages()[1], ChatMessage::User { .. }));
240    }
241
242    #[test]
243    fn replace_conversation_replaces_old_non_system_messages() {
244        let mut ctx = create_test_context();
245        ctx.replace_conversation(vec![ChatMessage::Assistant {
246            content: "replacement".to_string(),
247            reasoning: AssistantReasoning::default(),
248            timestamp: IsoString::now(),
249            tool_calls: vec![],
250        }]);
251
252        assert_eq!(ctx.message_count(), 2);
253        assert!(
254            ctx.messages()
255                .iter()
256                .all(|message| { !matches!(message, ChatMessage::User { .. } | ChatMessage::ToolCallResult(_)) })
257        );
258        assert!(matches!(ctx.messages()[1], ChatMessage::Assistant { ref content, .. } if content == "replacement"));
259    }
260
261    #[test]
262    fn replace_conversation_filters_incoming_system_messages() {
263        let mut ctx = create_test_context();
264        ctx.replace_conversation(vec![
265            ChatMessage::System { content: "wrong system".to_string(), timestamp: IsoString::now() },
266            ChatMessage::User { content: vec![ContentBlock::text("kept")], timestamp: IsoString::now() },
267        ]);
268
269        assert_eq!(ctx.message_count(), 2);
270        assert!(
271            matches!(ctx.messages()[0], ChatMessage::System { ref content, .. } if content == "You are a helpful assistant.")
272        );
273        assert!(matches!(ctx.messages()[1], ChatMessage::User { .. }));
274    }
275
276    #[test]
277    fn replace_conversation_does_not_change_tools() {
278        let tool = ToolDefinition::new("read_file", "Reads a file", "{}");
279        let mut ctx = Context::new(
280            vec![ChatMessage::System { content: "system".to_string(), timestamp: IsoString::now() }],
281            vec![tool.clone()],
282        );
283        ctx.replace_conversation(vec![ChatMessage::User {
284            content: vec![ContentBlock::text("new")],
285            timestamp: IsoString::now(),
286        }]);
287
288        assert_eq!(ctx.tools(), &vec![tool]);
289    }
290
291    #[test]
292    fn test_message_count() {
293        let ctx = create_test_context();
294        assert_eq!(ctx.message_count(), 6);
295    }
296
297    #[test]
298    fn test_with_compacted_summary_preserves_system_prompt() {
299        let ctx = create_test_context();
300        let compacted = ctx.with_compacted_summary("This is a summary of previous conversation.");
301
302        assert_eq!(compacted.message_count(), 2);
303        assert!(compacted.messages()[0].is_system());
304        assert!(compacted.messages()[1].is_summary());
305    }
306
307    #[test]
308    fn test_with_compacted_summary_empty_context() {
309        let ctx = Context::new(
310            vec![ChatMessage::System { content: "System".to_string(), timestamp: IsoString::now() }],
311            vec![],
312        );
313        let compacted = ctx.with_compacted_summary("Summary");
314
315        assert_eq!(compacted.message_count(), 1);
316    }
317
318    #[test]
319    fn test_messages_for_summary() {
320        let ctx = create_test_context();
321        let msgs = ctx.messages_for_summary();
322
323        assert_eq!(msgs.len(), 5);
324        assert!(msgs.iter().all(|m| !m.is_system()));
325    }
326
327    #[test]
328    fn test_prompt_cache_key_default_is_none() {
329        let ctx = create_test_context();
330        assert_eq!(ctx.prompt_cache_key(), None);
331    }
332
333    #[test]
334    fn test_prompt_cache_key_set_and_get() {
335        let mut ctx = create_test_context();
336        ctx.set_prompt_cache_key(Some("session-123".to_string()));
337        assert_eq!(ctx.prompt_cache_key(), Some("session-123"));
338
339        ctx.set_prompt_cache_key(None);
340        assert_eq!(ctx.prompt_cache_key(), None);
341    }
342
343    #[test]
344    fn test_prompt_cache_key_preserved_through_compaction() {
345        let mut ctx = create_test_context();
346        ctx.set_prompt_cache_key(Some("session-abc".to_string()));
347        let compacted = ctx.with_compacted_summary("Summary");
348        assert_eq!(compacted.prompt_cache_key(), Some("session-abc"));
349    }
350
351    #[test]
352    fn test_prompt_cache_key_preserved_through_projection() {
353        let model: LlmModel = "anthropic:claude-opus-4-6".parse().unwrap();
354        let mut ctx = Context::new(
355            vec![ChatMessage::User { content: vec![ContentBlock::text("Hello")], timestamp: IsoString::now() }],
356            vec![],
357        );
358        ctx.set_prompt_cache_key(Some("session-xyz".to_string()));
359        let projected = ctx.filter_encrypted_reasoning(&model);
360        assert_eq!(projected.prompt_cache_key(), Some("session-xyz"));
361    }
362
363    #[test]
364    fn test_reasoning_effort_default_is_none() {
365        let ctx = create_test_context();
366        assert_eq!(ctx.reasoning_effort(), None);
367    }
368
369    #[test]
370    fn test_reasoning_effort_set_and_get() {
371        let mut ctx = create_test_context();
372        ctx.set_reasoning_effort(Some(crate::ReasoningEffort::High));
373        assert_eq!(ctx.reasoning_effort(), Some(crate::ReasoningEffort::High));
374
375        ctx.set_reasoning_effort(None);
376        assert_eq!(ctx.reasoning_effort(), None);
377    }
378
379    #[test]
380    fn test_reasoning_effort_preserved_through_compaction() {
381        let mut ctx = create_test_context();
382        ctx.set_reasoning_effort(Some(crate::ReasoningEffort::Medium));
383        let compacted = ctx.with_compacted_summary("Summary");
384        assert_eq!(compacted.reasoning_effort(), Some(crate::ReasoningEffort::Medium));
385    }
386
387    #[test]
388    fn test_estimated_token_count() {
389        use crate::ToolDefinition;
390
391        // "You are a helpful assistant." = 28 bytes
392        // "Hello" = 5 bytes
393        // "Hi there!" = 9 bytes (assistant, no reasoning, no tool calls)
394        // 3 tool results: "Result 1" (8) + "tool1" (5) + "{}" (2) = 15 each = 45 total
395        // Total message bytes = 28 + 5 + 9 + 45 = 87
396        let ctx = create_test_context();
397        let base_estimate = ctx.estimated_token_count();
398
399        // With no tools, estimate = message_bytes / 4
400        assert_eq!(base_estimate, 87 / 4);
401
402        let tool = ToolDefinition::new("read_file", "Reads a file", "{}");
403        let ctx_with_tools = Context::new(ctx.messages().clone(), vec![tool]);
404        let with_tools_estimate = ctx_with_tools.estimated_token_count();
405        assert_eq!(with_tools_estimate, (87 + 9 + 12 + 2) / 4);
406        assert!(with_tools_estimate > base_estimate);
407    }
408
409    #[test]
410    fn compaction_drops_encrypted_reasoning() {
411        let model: LlmModel = "anthropic:claude-opus-4-6".parse().unwrap();
412        let ctx = Context::new(
413            vec![
414                ChatMessage::User { content: vec![ContentBlock::text("Hello")], timestamp: IsoString::now() },
415                ChatMessage::Assistant {
416                    content: "I see.".to_string(),
417                    reasoning: AssistantReasoning {
418                        summary_text: Some("thinking".to_string()),
419                        encrypted_content: Some(crate::EncryptedReasoningContent {
420                            id: "r_test".to_string(),
421                            model,
422                            content: "blob".to_string(),
423                        }),
424                    },
425                    timestamp: IsoString::now(),
426                    tool_calls: vec![],
427                },
428            ],
429            vec![],
430        );
431        let compacted = ctx.with_compacted_summary("Summary of conversation");
432
433        for msg in compacted.messages() {
434            if let ChatMessage::Assistant { reasoning, .. } = msg {
435                assert!(reasoning.encrypted_content.is_none(), "compaction should drop encrypted reasoning");
436            }
437        }
438    }
439
440    #[test]
441    fn projected_for_keeps_matching_model() {
442        let model: LlmModel = "anthropic:claude-opus-4-6".parse().unwrap();
443        let ctx = Context::new(
444            vec![ChatMessage::Assistant {
445                content: "reply".to_string(),
446                reasoning: AssistantReasoning {
447                    summary_text: Some("think".to_string()),
448                    encrypted_content: Some(crate::EncryptedReasoningContent {
449                        id: "r_test".to_string(),
450                        model: model.clone(),
451                        content: "blob".to_string(),
452                    }),
453                },
454                timestamp: IsoString::now(),
455                tool_calls: vec![],
456            }],
457            vec![],
458        );
459        let projected = ctx.filter_encrypted_reasoning(&model);
460        if let ChatMessage::Assistant { reasoning, .. } = &projected.messages()[0] {
461            assert!(reasoning.encrypted_content.is_some());
462            assert_eq!(reasoning.summary_text.as_deref(), Some("think"));
463        } else {
464            panic!("expected assistant message");
465        }
466    }
467
468    #[test]
469    fn projected_for_strips_non_matching_model() {
470        let model_a: LlmModel = "anthropic:claude-opus-4-6".parse().unwrap();
471        let model_b: LlmModel = "anthropic:claude-sonnet-4-5-20250929".parse().unwrap();
472        let ctx = Context::new(
473            vec![ChatMessage::Assistant {
474                content: "reply".to_string(),
475                reasoning: AssistantReasoning {
476                    summary_text: Some("think".to_string()),
477                    encrypted_content: Some(crate::EncryptedReasoningContent {
478                        id: "r_test".to_string(),
479                        model: model_a,
480                        content: "blob".to_string(),
481                    }),
482                },
483                timestamp: IsoString::now(),
484                tool_calls: vec![],
485            }],
486            vec![],
487        );
488        let projected = ctx.filter_encrypted_reasoning(&model_b);
489        if let ChatMessage::Assistant { reasoning, .. } = &projected.messages()[0] {
490            assert!(reasoning.encrypted_content.is_none());
491            assert_eq!(reasoning.summary_text.as_deref(), Some("think"));
492        } else {
493            panic!("expected assistant message");
494        }
495    }
496}
llm/context.rs

llm/
context.rs