llm/
context.rs

1use serde::{Deserialize, Serialize};
2
3use crate::catalog::LlmModel;
4use crate::chat_message::AssistantReasoning;
5use crate::reasoning::ReasoningEffort;
6use crate::types::IsoString;
7
8use super::{ChatMessage, ToolDefinition};
9
10#[doc = include_str!("docs/context.md")]
11#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct Context {
13    messages: Vec<ChatMessage>,
14    tools: Vec<ToolDefinition>,
15    #[serde(skip)]
16    reasoning_effort: Option<ReasoningEffort>,
17    #[serde(skip)]
18    prompt_cache_key: Option<String>,
19}
20
21impl Context {
22    pub fn new(messages: Vec<ChatMessage>, tools: Vec<ToolDefinition>) -> Self {
23        Self { messages, tools, reasoning_effort: None, prompt_cache_key: None }
24    }
25
26    pub fn prompt_cache_key(&self) -> Option<&str> {
27        self.prompt_cache_key.as_deref()
28    }
29
30    pub fn set_prompt_cache_key(&mut self, key: Option<String>) {
31        self.prompt_cache_key = key;
32    }
33
34    pub fn reasoning_effort(&self) -> Option<ReasoningEffort> {
35        self.reasoning_effort
36    }
37
38    pub fn set_reasoning_effort(&mut self, effort: Option<ReasoningEffort>) {
39        self.reasoning_effort = effort;
40    }
41
42    pub fn add_message(&mut self, message: ChatMessage) {
43        self.messages.push(message);
44    }
45
46    pub fn set_tools(&mut self, tools: Vec<ToolDefinition>) {
47        self.tools = tools;
48    }
49
50    pub fn set_system_content(&mut self, content: String) {
51        if let Some(ChatMessage::System { content: existing, .. }) = self.messages.first_mut() {
52            *existing = content;
53        } else {
54            self.messages.insert(0, ChatMessage::System { content, timestamp: IsoString::now() });
55        }
56    }
57
58    pub fn messages(&self) -> &Vec<ChatMessage> {
59        &self.messages
60    }
61
62    pub fn tools(&self) -> &Vec<ToolDefinition> {
63        &self.tools
64    }
65
66    /// Returns the number of messages in the context
67    pub fn message_count(&self) -> usize {
68        self.messages.len()
69    }
70
71    /// Estimate total token count using the ~4 bytes/token heuristic.
72    /// Includes messages and tool definitions. Used for pre-flight overflow detection.
73    pub fn estimated_token_count(&self) -> u32 {
74        let message_bytes: usize = self.messages.iter().map(ChatMessage::estimated_bytes).sum();
75        let tool_bytes: usize =
76            self.tools.iter().map(|t| t.name.len() + t.description.len() + t.parameters.len()).sum();
77        let total_bytes = message_bytes + tool_bytes;
78        u32::try_from(total_bytes / 4).unwrap_or(u32::MAX)
79    }
80
81    /// Build an assistant turn and its tool call results and append them to messages.
82    pub fn push_assistant_turn(
83        &mut self,
84        content: &str,
85        reasoning: AssistantReasoning,
86        completed_tools: Vec<Result<super::ToolCallResult, super::ToolCallError>>,
87    ) {
88        let tool_requests: Vec<_> = completed_tools
89            .iter()
90            .map(|result| match result {
91                Ok(r) => {
92                    super::ToolCallRequest { id: r.id.clone(), name: r.name.clone(), arguments: r.arguments.clone() }
93                }
94                Err(e) => super::ToolCallRequest {
95                    id: e.id.clone(),
96                    name: e.name.clone(),
97                    arguments: e.arguments.clone().unwrap_or_default(),
98                },
99            })
100            .collect();
101
102        self.messages.push(ChatMessage::Assistant {
103            content: content.to_string(),
104            reasoning,
105            timestamp: IsoString::now(),
106            tool_calls: tool_requests,
107        });
108
109        for result in completed_tools {
110            self.messages.push(ChatMessage::ToolCallResult(result));
111        }
112    }
113
114    /// Return a copy with encrypted reasoning filtered for the given model.
115    /// Encrypted content is kept only when its source model matches.
116    pub fn filter_encrypted_reasoning(&self, model: &LlmModel) -> Self {
117        let messages = self
118            .messages
119            .iter()
120            .map(|msg| match msg {
121                ChatMessage::Assistant { content, reasoning, timestamp, tool_calls } => ChatMessage::Assistant {
122                    content: content.clone(),
123                    reasoning: AssistantReasoning {
124                        summary_text: reasoning.summary_text.clone(),
125                        encrypted_content: reasoning
126                            .encrypted_content
127                            .as_ref()
128                            .filter(|ec| &ec.model == model)
129                            .cloned(),
130                    },
131                    timestamp: timestamp.clone(),
132                    tool_calls: tool_calls.clone(),
133                },
134                other => other.clone(),
135            })
136            .collect();
137        Context {
138            messages,
139            tools: self.tools.clone(),
140            reasoning_effort: self.reasoning_effort,
141            prompt_cache_key: self.prompt_cache_key.clone(),
142        }
143    }
144
145    /// Clear all non-system messages, retaining only system prompts.
146    pub fn clear_conversation(&mut self) {
147        self.messages.retain(super::chat_message::ChatMessage::is_system);
148    }
149
150    /// Replace all non-system messages while preserving the system prompt and runtime state.
151    pub fn replace_conversation(&mut self, messages: Vec<ChatMessage>) {
152        self.messages = self
153            .messages
154            .drain(..)
155            .filter(ChatMessage::is_system)
156            .chain(messages.into_iter().filter(|m| !m.is_system()))
157            .collect();
158    }
159
160    /// Get all non-system messages for summarization
161    pub fn messages_for_summary(&self) -> Vec<&ChatMessage> {
162        self.messages.iter().filter(|msg| !msg.is_system()).collect()
163    }
164
165    /// Create a new context with all messages replaced by a summary.
166    /// Preserves the system prompt and tools.
167    pub fn with_compacted_summary(&self, summary: &str) -> Context {
168        let system_messages: Vec<_> = self.messages.iter().filter(|msg| msg.is_system()).cloned().collect();
169
170        let non_system_count = self.messages.len() - system_messages.len();
171
172        let mut messages = system_messages;
173        if non_system_count > 0 {
174            messages.push(ChatMessage::Summary {
175                content: summary.to_string(),
176                timestamp: IsoString::now(),
177                messages_compacted: non_system_count,
178            });
179        }
180
181        Context {
182            messages,
183            tools: self.tools.clone(),
184            reasoning_effort: self.reasoning_effort,
185            prompt_cache_key: self.prompt_cache_key.clone(),
186        }
187    }
188}
189
190#[cfg(test)]
191mod tests {
192    use super::*;
193    use crate::ContentBlock;
194    use crate::ToolCallResult;
195    use crate::catalog::LlmModel;
196
197    fn create_test_context() -> Context {
198        let messages = vec![
199            ChatMessage::System { content: "You are a helpful assistant.".to_string(), timestamp: IsoString::now() },
200            ChatMessage::User { content: vec![ContentBlock::text("Hello")], timestamp: IsoString::now() },
201            ChatMessage::Assistant {
202                content: "Hi there!".to_string(),
203                reasoning: AssistantReasoning::default(),
204                timestamp: IsoString::now(),
205                tool_calls: vec![],
206            },
207            ChatMessage::ToolCallResult(Ok(ToolCallResult {
208                id: "1".to_string(),
209                name: "tool1".to_string(),
210                arguments: "{}".to_string(),
211                result: "Result 1".to_string(),
212            })),
213            ChatMessage::ToolCallResult(Ok(ToolCallResult {
214                id: "2".to_string(),
215                name: "tool2".to_string(),
216                arguments: "{}".to_string(),
217                result: "Result 2".to_string(),
218            })),
219            ChatMessage::ToolCallResult(Ok(ToolCallResult {
220                id: "3".to_string(),
221                name: "tool3".to_string(),
222                arguments: "{}".to_string(),
223                result: "Result 3".to_string(),
224            })),
225        ];
226        Context::new(messages, vec![])
227    }
228
229    #[test]
230    fn replace_conversation_preserves_system_message() {
231        let mut ctx = create_test_context();
232        ctx.replace_conversation(vec![ChatMessage::User {
233            content: vec![ContentBlock::text("new")],
234            timestamp: IsoString::now(),
235        }]);
236
237        assert_eq!(ctx.message_count(), 2);
238        assert!(ctx.messages()[0].is_system());
239        assert!(matches!(ctx.messages()[1], ChatMessage::User { .. }));
240    }
241
242    #[test]
243    fn replace_conversation_replaces_old_non_system_messages() {
244        let mut ctx = create_test_context();
245        ctx.replace_conversation(vec![ChatMessage::Assistant {
246            content: "replacement".to_string(),
247            reasoning: AssistantReasoning::default(),
248            timestamp: IsoString::now(),
249            tool_calls: vec![],
250        }]);
251
252        assert_eq!(ctx.message_count(), 2);
253        assert!(
254            ctx.messages()
255                .iter()
256                .all(|message| { !matches!(message, ChatMessage::User { .. } | ChatMessage::ToolCallResult(_)) })
257        );
258        assert!(matches!(ctx.messages()[1], ChatMessage::Assistant { ref content, .. } if content == "replacement"));
259    }
260
261    #[test]
262    fn replace_conversation_filters_incoming_system_messages() {
263        let mut ctx = create_test_context();
264        ctx.replace_conversation(vec![
265            ChatMessage::System { content: "wrong system".to_string(), timestamp: IsoString::now() },
266            ChatMessage::User { content: vec![ContentBlock::text("kept")], timestamp: IsoString::now() },
267        ]);
268
269        assert_eq!(ctx.message_count(), 2);
270        assert!(
271            matches!(ctx.messages()[0], ChatMessage::System { ref content, .. } if content == "You are a helpful assistant.")
272        );
273        assert!(matches!(ctx.messages()[1], ChatMessage::User { .. }));
274    }
275
276    #[test]
277    fn replace_conversation_does_not_change_tools() {
278        let tool = ToolDefinition {
279            name: "read_file".to_string(),
280            description: "Reads a file".to_string(),
281            parameters: "{}".to_string(),
282            server: None,
283        };
284        let mut ctx = Context::new(
285            vec![ChatMessage::System { content: "system".to_string(), timestamp: IsoString::now() }],
286            vec![tool.clone()],
287        );
288        ctx.replace_conversation(vec![ChatMessage::User {
289            content: vec![ContentBlock::text("new")],
290            timestamp: IsoString::now(),
291        }]);
292
293        assert_eq!(ctx.tools(), &vec![tool]);
294    }
295
296    #[test]
297    fn test_message_count() {
298        let ctx = create_test_context();
299        assert_eq!(ctx.message_count(), 6);
300    }
301
302    #[test]
303    fn test_with_compacted_summary_preserves_system_prompt() {
304        let ctx = create_test_context();
305        let compacted = ctx.with_compacted_summary("This is a summary of previous conversation.");
306
307        assert_eq!(compacted.message_count(), 2);
308        assert!(compacted.messages()[0].is_system());
309        assert!(compacted.messages()[1].is_summary());
310    }
311
312    #[test]
313    fn test_with_compacted_summary_empty_context() {
314        let ctx = Context::new(
315            vec![ChatMessage::System { content: "System".to_string(), timestamp: IsoString::now() }],
316            vec![],
317        );
318        let compacted = ctx.with_compacted_summary("Summary");
319
320        assert_eq!(compacted.message_count(), 1);
321    }
322
323    #[test]
324    fn test_messages_for_summary() {
325        let ctx = create_test_context();
326        let msgs = ctx.messages_for_summary();
327
328        assert_eq!(msgs.len(), 5);
329        assert!(msgs.iter().all(|m| !m.is_system()));
330    }
331
332    #[test]
333    fn test_prompt_cache_key_default_is_none() {
334        let ctx = create_test_context();
335        assert_eq!(ctx.prompt_cache_key(), None);
336    }
337
338    #[test]
339    fn test_prompt_cache_key_set_and_get() {
340        let mut ctx = create_test_context();
341        ctx.set_prompt_cache_key(Some("session-123".to_string()));
342        assert_eq!(ctx.prompt_cache_key(), Some("session-123"));
343
344        ctx.set_prompt_cache_key(None);
345        assert_eq!(ctx.prompt_cache_key(), None);
346    }
347
348    #[test]
349    fn test_prompt_cache_key_preserved_through_compaction() {
350        let mut ctx = create_test_context();
351        ctx.set_prompt_cache_key(Some("session-abc".to_string()));
352        let compacted = ctx.with_compacted_summary("Summary");
353        assert_eq!(compacted.prompt_cache_key(), Some("session-abc"));
354    }
355
356    #[test]
357    fn test_prompt_cache_key_preserved_through_projection() {
358        let model: LlmModel = "anthropic:claude-opus-4-6".parse().unwrap();
359        let mut ctx = Context::new(
360            vec![ChatMessage::User { content: vec![ContentBlock::text("Hello")], timestamp: IsoString::now() }],
361            vec![],
362        );
363        ctx.set_prompt_cache_key(Some("session-xyz".to_string()));
364        let projected = ctx.filter_encrypted_reasoning(&model);
365        assert_eq!(projected.prompt_cache_key(), Some("session-xyz"));
366    }
367
368    #[test]
369    fn test_reasoning_effort_default_is_none() {
370        let ctx = create_test_context();
371        assert_eq!(ctx.reasoning_effort(), None);
372    }
373
374    #[test]
375    fn test_reasoning_effort_set_and_get() {
376        let mut ctx = create_test_context();
377        ctx.set_reasoning_effort(Some(crate::ReasoningEffort::High));
378        assert_eq!(ctx.reasoning_effort(), Some(crate::ReasoningEffort::High));
379
380        ctx.set_reasoning_effort(None);
381        assert_eq!(ctx.reasoning_effort(), None);
382    }
383
384    #[test]
385    fn test_reasoning_effort_preserved_through_compaction() {
386        let mut ctx = create_test_context();
387        ctx.set_reasoning_effort(Some(crate::ReasoningEffort::Medium));
388        let compacted = ctx.with_compacted_summary("Summary");
389        assert_eq!(compacted.reasoning_effort(), Some(crate::ReasoningEffort::Medium));
390    }
391
392    #[test]
393    fn test_estimated_token_count() {
394        use crate::ToolDefinition;
395
396        // "You are a helpful assistant." = 28 bytes
397        // "Hello" = 5 bytes
398        // "Hi there!" = 9 bytes (assistant, no reasoning, no tool calls)
399        // 3 tool results: "Result 1" (8) + "tool1" (5) + "{}" (2) = 15 each = 45 total
400        // Total message bytes = 28 + 5 + 9 + 45 = 87
401        let ctx = create_test_context();
402        let base_estimate = ctx.estimated_token_count();
403
404        // With no tools, estimate = message_bytes / 4
405        assert_eq!(base_estimate, 87 / 4);
406
407        // Now add a tool definition and verify it increases
408        let tool = ToolDefinition {
409            name: "read_file".to_string(),           // 9
410            description: "Reads a file".to_string(), // 12
411            parameters: "{}".to_string(),            // 2
412            server: None,
413        };
414        let ctx_with_tools = Context::new(ctx.messages().clone(), vec![tool]);
415        let with_tools_estimate = ctx_with_tools.estimated_token_count();
416        assert_eq!(with_tools_estimate, (87 + 9 + 12 + 2) / 4);
417        assert!(with_tools_estimate > base_estimate);
418    }
419
420    #[test]
421    fn compaction_drops_encrypted_reasoning() {
422        let model: LlmModel = "anthropic:claude-opus-4-6".parse().unwrap();
423        let ctx = Context::new(
424            vec![
425                ChatMessage::User { content: vec![ContentBlock::text("Hello")], timestamp: IsoString::now() },
426                ChatMessage::Assistant {
427                    content: "I see.".to_string(),
428                    reasoning: AssistantReasoning {
429                        summary_text: Some("thinking".to_string()),
430                        encrypted_content: Some(crate::EncryptedReasoningContent {
431                            id: "r_test".to_string(),
432                            model,
433                            content: "blob".to_string(),
434                        }),
435                    },
436                    timestamp: IsoString::now(),
437                    tool_calls: vec![],
438                },
439            ],
440            vec![],
441        );
442        let compacted = ctx.with_compacted_summary("Summary of conversation");
443
444        for msg in compacted.messages() {
445            if let ChatMessage::Assistant { reasoning, .. } = msg {
446                assert!(reasoning.encrypted_content.is_none(), "compaction should drop encrypted reasoning");
447            }
448        }
449    }
450
451    #[test]
452    fn projected_for_keeps_matching_model() {
453        let model: LlmModel = "anthropic:claude-opus-4-6".parse().unwrap();
454        let ctx = Context::new(
455            vec![ChatMessage::Assistant {
456                content: "reply".to_string(),
457                reasoning: AssistantReasoning {
458                    summary_text: Some("think".to_string()),
459                    encrypted_content: Some(crate::EncryptedReasoningContent {
460                        id: "r_test".to_string(),
461                        model: model.clone(),
462                        content: "blob".to_string(),
463                    }),
464                },
465                timestamp: IsoString::now(),
466                tool_calls: vec![],
467            }],
468            vec![],
469        );
470        let projected = ctx.filter_encrypted_reasoning(&model);
471        if let ChatMessage::Assistant { reasoning, .. } = &projected.messages()[0] {
472            assert!(reasoning.encrypted_content.is_some());
473            assert_eq!(reasoning.summary_text.as_deref(), Some("think"));
474        } else {
475            panic!("expected assistant message");
476        }
477    }
478
479    #[test]
480    fn projected_for_strips_non_matching_model() {
481        let model_a: LlmModel = "anthropic:claude-opus-4-6".parse().unwrap();
482        let model_b: LlmModel = "anthropic:claude-sonnet-4-5-20250929".parse().unwrap();
483        let ctx = Context::new(
484            vec![ChatMessage::Assistant {
485                content: "reply".to_string(),
486                reasoning: AssistantReasoning {
487                    summary_text: Some("think".to_string()),
488                    encrypted_content: Some(crate::EncryptedReasoningContent {
489                        id: "r_test".to_string(),
490                        model: model_a,
491                        content: "blob".to_string(),
492                    }),
493                },
494                timestamp: IsoString::now(),
495                tool_calls: vec![],
496            }],
497            vec![],
498        );
499        let projected = ctx.filter_encrypted_reasoning(&model_b);
500        if let ChatMessage::Assistant { reasoning, .. } = &projected.messages()[0] {
501            assert!(reasoning.encrypted_content.is_none());
502            assert_eq!(reasoning.summary_text.as_deref(), Some("think"));
503        } else {
504            panic!("expected assistant message");
505        }
506    }
507}
llm/context.rs

llm/
context.rs