llm/
context.rs

1use serde::{Deserialize, Serialize};
2
3use crate::catalog::LlmModel;
4use crate::chat_message::AssistantReasoning;
5use crate::reasoning::ReasoningEffort;
6use crate::types::IsoString;
7
8use super::{ChatMessage, ToolDefinition};
9
10#[doc = include_str!("docs/context.md")]
11#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct Context {
13    messages: Vec<ChatMessage>,
14    tools: Vec<ToolDefinition>,
15    #[serde(skip)]
16    reasoning_effort: Option<ReasoningEffort>,
17    #[serde(skip)]
18    prompt_cache_key: Option<String>,
19}
20
21impl Context {
22    pub fn new(messages: Vec<ChatMessage>, tools: Vec<ToolDefinition>) -> Self {
23        Self { messages, tools, reasoning_effort: None, prompt_cache_key: None }
24    }
25
26    pub fn prompt_cache_key(&self) -> Option<&str> {
27        self.prompt_cache_key.as_deref()
28    }
29
30    pub fn set_prompt_cache_key(&mut self, key: Option<String>) {
31        self.prompt_cache_key = key;
32    }
33
34    pub fn reasoning_effort(&self) -> Option<ReasoningEffort> {
35        self.reasoning_effort
36    }
37
38    pub fn set_reasoning_effort(&mut self, effort: Option<ReasoningEffort>) {
39        self.reasoning_effort = effort;
40    }
41
42    pub fn add_message(&mut self, message: ChatMessage) {
43        self.messages.push(message);
44    }
45
46    pub fn set_tools(&mut self, tools: Vec<ToolDefinition>) {
47        self.tools = tools;
48    }
49
50    pub fn messages(&self) -> &Vec<ChatMessage> {
51        &self.messages
52    }
53
54    pub fn tools(&self) -> &Vec<ToolDefinition> {
55        &self.tools
56    }
57
58    /// Returns the number of messages in the context
59    pub fn message_count(&self) -> usize {
60        self.messages.len()
61    }
62
63    /// Estimate total token count using the ~4 bytes/token heuristic.
64    /// Includes messages and tool definitions. Used for pre-flight overflow detection.
65    pub fn estimated_token_count(&self) -> u32 {
66        let message_bytes: usize = self.messages.iter().map(ChatMessage::estimated_bytes).sum();
67        let tool_bytes: usize =
68            self.tools.iter().map(|t| t.name.len() + t.description.len() + t.parameters.len()).sum();
69        let total_bytes = message_bytes + tool_bytes;
70        u32::try_from(total_bytes / 4).unwrap_or(u32::MAX)
71    }
72
73    /// Build an assistant turn and its tool call results and append them to messages.
74    pub fn push_assistant_turn(
75        &mut self,
76        content: &str,
77        reasoning: AssistantReasoning,
78        completed_tools: Vec<Result<super::ToolCallResult, super::ToolCallError>>,
79    ) {
80        let tool_requests: Vec<_> = completed_tools
81            .iter()
82            .map(|result| match result {
83                Ok(r) => {
84                    super::ToolCallRequest { id: r.id.clone(), name: r.name.clone(), arguments: r.arguments.clone() }
85                }
86                Err(e) => super::ToolCallRequest {
87                    id: e.id.clone(),
88                    name: e.name.clone(),
89                    arguments: e.arguments.clone().unwrap_or_default(),
90                },
91            })
92            .collect();
93
94        self.messages.push(ChatMessage::Assistant {
95            content: content.to_string(),
96            reasoning,
97            timestamp: IsoString::now(),
98            tool_calls: tool_requests,
99        });
100
101        for result in completed_tools {
102            self.messages.push(ChatMessage::ToolCallResult(result));
103        }
104    }
105
106    /// Return a copy with encrypted reasoning filtered for the given model.
107    /// Encrypted content is kept only when its source model matches.
108    pub fn filter_encrypted_reasoning(&self, model: &LlmModel) -> Self {
109        let messages = self
110            .messages
111            .iter()
112            .map(|msg| match msg {
113                ChatMessage::Assistant { content, reasoning, timestamp, tool_calls } => ChatMessage::Assistant {
114                    content: content.clone(),
115                    reasoning: AssistantReasoning {
116                        summary_text: reasoning.summary_text.clone(),
117                        encrypted_content: reasoning
118                            .encrypted_content
119                            .as_ref()
120                            .filter(|ec| &ec.model == model)
121                            .cloned(),
122                    },
123                    timestamp: timestamp.clone(),
124                    tool_calls: tool_calls.clone(),
125                },
126                other => other.clone(),
127            })
128            .collect();
129        Context {
130            messages,
131            tools: self.tools.clone(),
132            reasoning_effort: self.reasoning_effort,
133            prompt_cache_key: self.prompt_cache_key.clone(),
134        }
135    }
136
137    /// Clear all non-system messages, retaining only system prompts.
138    pub fn clear_conversation(&mut self) {
139        self.messages.retain(super::chat_message::ChatMessage::is_system);
140    }
141
142    /// Get all non-system messages for summarization
143    pub fn messages_for_summary(&self) -> Vec<&ChatMessage> {
144        self.messages.iter().filter(|msg| !msg.is_system()).collect()
145    }
146
147    /// Create a new context with all messages replaced by a summary.
148    /// Preserves the system prompt and tools.
149    pub fn with_compacted_summary(&self, summary: &str) -> Context {
150        let system_messages: Vec<_> = self.messages.iter().filter(|msg| msg.is_system()).cloned().collect();
151
152        let non_system_count = self.messages.len() - system_messages.len();
153
154        let mut messages = system_messages;
155        if non_system_count > 0 {
156            messages.push(ChatMessage::Summary {
157                content: summary.to_string(),
158                timestamp: IsoString::now(),
159                messages_compacted: non_system_count,
160            });
161        }
162
163        Context {
164            messages,
165            tools: self.tools.clone(),
166            reasoning_effort: self.reasoning_effort,
167            prompt_cache_key: self.prompt_cache_key.clone(),
168        }
169    }
170}
171
172#[cfg(test)]
173mod tests {
174    use super::*;
175    use crate::ContentBlock;
176    use crate::ToolCallResult;
177    use crate::catalog::LlmModel;
178
179    fn create_test_context() -> Context {
180        let messages = vec![
181            ChatMessage::System { content: "You are a helpful assistant.".to_string(), timestamp: IsoString::now() },
182            ChatMessage::User { content: vec![ContentBlock::text("Hello")], timestamp: IsoString::now() },
183            ChatMessage::Assistant {
184                content: "Hi there!".to_string(),
185                reasoning: AssistantReasoning::default(),
186                timestamp: IsoString::now(),
187                tool_calls: vec![],
188            },
189            ChatMessage::ToolCallResult(Ok(ToolCallResult {
190                id: "1".to_string(),
191                name: "tool1".to_string(),
192                arguments: "{}".to_string(),
193                result: "Result 1".to_string(),
194            })),
195            ChatMessage::ToolCallResult(Ok(ToolCallResult {
196                id: "2".to_string(),
197                name: "tool2".to_string(),
198                arguments: "{}".to_string(),
199                result: "Result 2".to_string(),
200            })),
201            ChatMessage::ToolCallResult(Ok(ToolCallResult {
202                id: "3".to_string(),
203                name: "tool3".to_string(),
204                arguments: "{}".to_string(),
205                result: "Result 3".to_string(),
206            })),
207        ];
208        Context::new(messages, vec![])
209    }
210
211    #[test]
212    fn test_message_count() {
213        let ctx = create_test_context();
214        assert_eq!(ctx.message_count(), 6);
215    }
216
217    #[test]
218    fn test_with_compacted_summary_preserves_system_prompt() {
219        let ctx = create_test_context();
220        let compacted = ctx.with_compacted_summary("This is a summary of previous conversation.");
221
222        assert_eq!(compacted.message_count(), 2);
223        assert!(compacted.messages()[0].is_system());
224        assert!(compacted.messages()[1].is_summary());
225    }
226
227    #[test]
228    fn test_with_compacted_summary_empty_context() {
229        let ctx = Context::new(
230            vec![ChatMessage::System { content: "System".to_string(), timestamp: IsoString::now() }],
231            vec![],
232        );
233        let compacted = ctx.with_compacted_summary("Summary");
234
235        assert_eq!(compacted.message_count(), 1);
236    }
237
238    #[test]
239    fn test_messages_for_summary() {
240        let ctx = create_test_context();
241        let msgs = ctx.messages_for_summary();
242
243        assert_eq!(msgs.len(), 5);
244        assert!(msgs.iter().all(|m| !m.is_system()));
245    }
246
247    #[test]
248    fn test_prompt_cache_key_default_is_none() {
249        let ctx = create_test_context();
250        assert_eq!(ctx.prompt_cache_key(), None);
251    }
252
253    #[test]
254    fn test_prompt_cache_key_set_and_get() {
255        let mut ctx = create_test_context();
256        ctx.set_prompt_cache_key(Some("session-123".to_string()));
257        assert_eq!(ctx.prompt_cache_key(), Some("session-123"));
258
259        ctx.set_prompt_cache_key(None);
260        assert_eq!(ctx.prompt_cache_key(), None);
261    }
262
263    #[test]
264    fn test_prompt_cache_key_preserved_through_compaction() {
265        let mut ctx = create_test_context();
266        ctx.set_prompt_cache_key(Some("session-abc".to_string()));
267        let compacted = ctx.with_compacted_summary("Summary");
268        assert_eq!(compacted.prompt_cache_key(), Some("session-abc"));
269    }
270
271    #[test]
272    fn test_prompt_cache_key_preserved_through_projection() {
273        let model: LlmModel = "anthropic:claude-opus-4-6".parse().unwrap();
274        let mut ctx = Context::new(
275            vec![ChatMessage::User { content: vec![ContentBlock::text("Hello")], timestamp: IsoString::now() }],
276            vec![],
277        );
278        ctx.set_prompt_cache_key(Some("session-xyz".to_string()));
279        let projected = ctx.filter_encrypted_reasoning(&model);
280        assert_eq!(projected.prompt_cache_key(), Some("session-xyz"));
281    }
282
283    #[test]
284    fn test_reasoning_effort_default_is_none() {
285        let ctx = create_test_context();
286        assert_eq!(ctx.reasoning_effort(), None);
287    }
288
289    #[test]
290    fn test_reasoning_effort_set_and_get() {
291        let mut ctx = create_test_context();
292        ctx.set_reasoning_effort(Some(crate::ReasoningEffort::High));
293        assert_eq!(ctx.reasoning_effort(), Some(crate::ReasoningEffort::High));
294
295        ctx.set_reasoning_effort(None);
296        assert_eq!(ctx.reasoning_effort(), None);
297    }
298
299    #[test]
300    fn test_reasoning_effort_preserved_through_compaction() {
301        let mut ctx = create_test_context();
302        ctx.set_reasoning_effort(Some(crate::ReasoningEffort::Medium));
303        let compacted = ctx.with_compacted_summary("Summary");
304        assert_eq!(compacted.reasoning_effort(), Some(crate::ReasoningEffort::Medium));
305    }
306
307    #[test]
308    fn test_estimated_token_count() {
309        use crate::ToolDefinition;
310
311        // "You are a helpful assistant." = 28 bytes
312        // "Hello" = 5 bytes
313        // "Hi there!" = 9 bytes (assistant, no reasoning, no tool calls)
314        // 3 tool results: "Result 1" (8) + "tool1" (5) + "{}" (2) = 15 each = 45 total
315        // Total message bytes = 28 + 5 + 9 + 45 = 87
316        let ctx = create_test_context();
317        let base_estimate = ctx.estimated_token_count();
318
319        // With no tools, estimate = message_bytes / 4
320        assert_eq!(base_estimate, 87 / 4);
321
322        // Now add a tool definition and verify it increases
323        let tool = ToolDefinition {
324            name: "read_file".to_string(),           // 9
325            description: "Reads a file".to_string(), // 12
326            parameters: "{}".to_string(),            // 2
327            server: None,
328        };
329        let ctx_with_tools = Context::new(ctx.messages().clone(), vec![tool]);
330        let with_tools_estimate = ctx_with_tools.estimated_token_count();
331        assert_eq!(with_tools_estimate, (87 + 9 + 12 + 2) / 4);
332        assert!(with_tools_estimate > base_estimate);
333    }
334
335    #[test]
336    fn compaction_drops_encrypted_reasoning() {
337        let model: LlmModel = "anthropic:claude-opus-4-6".parse().unwrap();
338        let ctx = Context::new(
339            vec![
340                ChatMessage::User { content: vec![ContentBlock::text("Hello")], timestamp: IsoString::now() },
341                ChatMessage::Assistant {
342                    content: "I see.".to_string(),
343                    reasoning: AssistantReasoning {
344                        summary_text: Some("thinking".to_string()),
345                        encrypted_content: Some(crate::EncryptedReasoningContent {
346                            id: "r_test".to_string(),
347                            model,
348                            content: "blob".to_string(),
349                        }),
350                    },
351                    timestamp: IsoString::now(),
352                    tool_calls: vec![],
353                },
354            ],
355            vec![],
356        );
357        let compacted = ctx.with_compacted_summary("Summary of conversation");
358
359        for msg in compacted.messages() {
360            if let ChatMessage::Assistant { reasoning, .. } = msg {
361                assert!(reasoning.encrypted_content.is_none(), "compaction should drop encrypted reasoning");
362            }
363        }
364    }
365
366    #[test]
367    fn projected_for_keeps_matching_model() {
368        let model: LlmModel = "anthropic:claude-opus-4-6".parse().unwrap();
369        let ctx = Context::new(
370            vec![ChatMessage::Assistant {
371                content: "reply".to_string(),
372                reasoning: AssistantReasoning {
373                    summary_text: Some("think".to_string()),
374                    encrypted_content: Some(crate::EncryptedReasoningContent {
375                        id: "r_test".to_string(),
376                        model: model.clone(),
377                        content: "blob".to_string(),
378                    }),
379                },
380                timestamp: IsoString::now(),
381                tool_calls: vec![],
382            }],
383            vec![],
384        );
385        let projected = ctx.filter_encrypted_reasoning(&model);
386        if let ChatMessage::Assistant { reasoning, .. } = &projected.messages()[0] {
387            assert!(reasoning.encrypted_content.is_some());
388            assert_eq!(reasoning.summary_text.as_deref(), Some("think"));
389        } else {
390            panic!("expected assistant message");
391        }
392    }
393
394    #[test]
395    fn projected_for_strips_non_matching_model() {
396        let model_a: LlmModel = "anthropic:claude-opus-4-6".parse().unwrap();
397        let model_b: LlmModel = "anthropic:claude-sonnet-4-5-20250929".parse().unwrap();
398        let ctx = Context::new(
399            vec![ChatMessage::Assistant {
400                content: "reply".to_string(),
401                reasoning: AssistantReasoning {
402                    summary_text: Some("think".to_string()),
403                    encrypted_content: Some(crate::EncryptedReasoningContent {
404                        id: "r_test".to_string(),
405                        model: model_a,
406                        content: "blob".to_string(),
407                    }),
408                },
409                timestamp: IsoString::now(),
410                tool_calls: vec![],
411            }],
412            vec![],
413        );
414        let projected = ctx.filter_encrypted_reasoning(&model_b);
415        if let ChatMessage::Assistant { reasoning, .. } = &projected.messages()[0] {
416            assert!(reasoning.encrypted_content.is_none());
417            assert_eq!(reasoning.summary_text.as_deref(), Some("think"));
418        } else {
419            panic!("expected assistant message");
420        }
421    }
422}
llm/context.rs

llm/
context.rs