Skip to main content

agent_sdk/context/
estimator.rs

1//! Token estimation for context size calculation.
2
3use crate::llm::{Content, ContentBlock, Message};
4
5/// Estimates token count for messages.
6///
7/// Uses a simple heuristic of ~4 characters per token, which provides
8/// a reasonable approximation for most English text and code.
9///
10/// For more accurate counting, consider using a tokenizer library
11/// specific to your model (e.g., tiktoken for `OpenAI` models).
12pub struct TokenEstimator;
13
14impl TokenEstimator {
15    /// Characters per token estimate.
16    /// This is a conservative estimate; actual ratio varies by content.
17    const CHARS_PER_TOKEN: usize = 4;
18
19    /// Overhead tokens per message (role, formatting).
20    const MESSAGE_OVERHEAD: usize = 4;
21
22    /// Overhead for tool use blocks (id, name, formatting).
23    const TOOL_USE_OVERHEAD: usize = 20;
24
25    /// Overhead for tool result blocks (id, formatting).
26    const TOOL_RESULT_OVERHEAD: usize = 10;
27
28    /// Minimum token estimate for redacted thinking blocks.
29    ///
30    /// Even small redacted thinking blocks carry significant API token cost
31    /// because they contain encrypted reasoning that the model must process.
32    const REDACTED_THINKING_MIN_TOKENS: usize = 512;
33
34    /// Estimate tokens for a text string.
35    #[must_use]
36    pub const fn estimate_text(text: &str) -> usize {
37        // Simple estimation: ~4 chars per token
38        text.len().div_ceil(Self::CHARS_PER_TOKEN)
39    }
40
41    /// Estimate tokens for a single message.
42    #[must_use]
43    pub fn estimate_message(message: &Message) -> usize {
44        let content_tokens = match &message.content {
45            Content::Text(text) => Self::estimate_text(text),
46            Content::Blocks(blocks) => blocks.iter().map(Self::estimate_block).sum(),
47        };
48
49        content_tokens + Self::MESSAGE_OVERHEAD
50    }
51
52    /// Estimate tokens for a content block.
53    #[must_use]
54    pub fn estimate_block(block: &ContentBlock) -> usize {
55        match block {
56            ContentBlock::Text { text } => Self::estimate_text(text),
57            ContentBlock::Thinking { thinking, .. } => Self::estimate_text(thinking),
58            ContentBlock::RedactedThinking { data } => {
59                // The data field is a base64-encoded encrypted blob whose size
60                // correlates with the original thinking content.  Base64 encodes
61                // 3 bytes into 4 chars, so `data.len() * 3 / 4` approximates
62                // the raw byte count.  Using the same chars-per-token heuristic
63                // on the raw bytes gives a reasonable lower bound.
64                //
65                // A floor of REDACTED_THINKING_MIN_TOKENS prevents tiny blocks
66                // from being under-counted — the API charges substantial token
67                // overhead for every redacted thinking block regardless of size.
68                let raw_bytes = data.len() * 3 / 4;
69                let estimated = raw_bytes.div_ceil(Self::CHARS_PER_TOKEN);
70                estimated.max(Self::REDACTED_THINKING_MIN_TOKENS)
71            }
72            ContentBlock::ToolUse { name, input, .. } => {
73                let input_str = serde_json::to_string(input).unwrap_or_default();
74                Self::estimate_text(name)
75                    + Self::estimate_text(&input_str)
76                    + Self::TOOL_USE_OVERHEAD
77            }
78            ContentBlock::ToolResult { content, .. } => {
79                Self::estimate_text(content) + Self::TOOL_RESULT_OVERHEAD
80            }
81            ContentBlock::Image { source } | ContentBlock::Document { source } => {
82                // Rough estimate: base64 data is ~4/3 of original, 1 token per 4 chars
83                source.data.len() / 4 + Self::MESSAGE_OVERHEAD
84            }
85            // `ContentBlock` is `#[non_exhaustive]`; charge an unknown future
86            // block kind the per-message overhead as a conservative floor.
87            _ => Self::MESSAGE_OVERHEAD,
88        }
89    }
90
91    /// Estimate total tokens for a message history.
92    #[must_use]
93    pub fn estimate_history(messages: &[Message]) -> usize {
94        messages.iter().map(Self::estimate_message).sum()
95    }
96}
97
98#[cfg(test)]
99mod tests {
100    use super::*;
101    use crate::llm::Role;
102    use serde_json::json;
103
104    #[test]
105    fn test_estimate_text() {
106        // Empty text
107        assert_eq!(TokenEstimator::estimate_text(""), 0);
108
109        // Short text (less than 4 chars)
110        assert_eq!(TokenEstimator::estimate_text("hi"), 1);
111
112        // Exactly 4 chars
113        assert_eq!(TokenEstimator::estimate_text("test"), 1);
114
115        // 5 chars should be 2 tokens
116        assert_eq!(TokenEstimator::estimate_text("hello"), 2);
117
118        // Longer text
119        assert_eq!(TokenEstimator::estimate_text("hello world!"), 3); // 12 chars / 4 = 3
120    }
121
122    #[test]
123    fn test_estimate_text_message() {
124        let message = Message {
125            role: Role::User,
126            content: Content::Text("Hello, how are you?".to_string()), // 19 chars = 5 tokens
127        };
128
129        let estimate = TokenEstimator::estimate_message(&message);
130        // 5 content tokens + 4 overhead = 9
131        assert_eq!(estimate, 9);
132    }
133
134    #[test]
135    fn test_estimate_blocks_message() {
136        let message = Message {
137            role: Role::Assistant,
138            content: Content::Blocks(vec![
139                ContentBlock::Text {
140                    text: "Let me help.".to_string(), // 12 chars = 3 tokens
141                },
142                ContentBlock::ToolUse {
143                    id: "tool_123".to_string(),
144                    name: "read".to_string(),            // 4 chars = 1 token
145                    input: json!({"path": "/test.txt"}), // ~20 chars = 5 tokens
146                    thought_signature: None,
147                },
148            ]),
149        };
150
151        let estimate = TokenEstimator::estimate_message(&message);
152        // Text: 3 tokens
153        // ToolUse: 1 (name) + 5 (input) + 20 (overhead) = 26 tokens
154        // Message overhead: 4
155        // Total: 3 + 26 + 4 = 33
156        assert!(estimate > 25); // Verify it accounts for tool use
157    }
158
159    #[test]
160    fn test_estimate_tool_result() {
161        let message = Message {
162            role: Role::User,
163            content: Content::Blocks(vec![ContentBlock::ToolResult {
164                tool_use_id: "tool_123".to_string(),
165                content: "File contents here...".to_string(), // 21 chars = 6 tokens
166                is_error: None,
167            }]),
168        };
169
170        let estimate = TokenEstimator::estimate_message(&message);
171        // 6 content + 10 overhead + 4 message overhead = 20
172        assert_eq!(estimate, 20);
173    }
174
175    #[test]
176    fn test_estimate_history() {
177        let messages = vec![
178            Message::user("Hello"),          // 5 chars = 2 tokens + 4 overhead = 6
179            Message::assistant("Hi there!"), // 9 chars = 3 tokens + 4 overhead = 7
180            Message::user("How are you?"),   // 12 chars = 3 tokens + 4 overhead = 7
181        ];
182
183        let estimate = TokenEstimator::estimate_history(&messages);
184        assert_eq!(estimate, 20);
185    }
186
187    #[test]
188    fn test_empty_history() {
189        let messages: Vec<Message> = vec![];
190        assert_eq!(TokenEstimator::estimate_history(&messages), 0);
191    }
192
193    #[test]
194    fn test_estimate_redacted_thinking_uses_data_length() {
195        // Simulate a realistic redacted thinking blob (~8KB base64 data).
196        // 8192 base64 chars → ~6144 raw bytes → 6144/4 = 1536 estimated tokens.
197        let data = "A".repeat(8192);
198        let block = ContentBlock::RedactedThinking { data };
199
200        let estimate = TokenEstimator::estimate_block(&block);
201        assert_eq!(estimate, 1536);
202    }
203
204    #[test]
205    fn test_estimate_redacted_thinking_respects_minimum() {
206        // Tiny data blob: 100 base64 chars → ~75 raw bytes → 75/4 = 19 tokens.
207        // Should be clamped to the minimum (512).
208        let data = "A".repeat(100);
209        let block = ContentBlock::RedactedThinking { data };
210
211        let estimate = TokenEstimator::estimate_block(&block);
212        assert_eq!(estimate, TokenEstimator::REDACTED_THINKING_MIN_TOKENS);
213    }
214
215    #[test]
216    fn test_estimate_redacted_thinking_empty_data() {
217        // Empty data should return the minimum floor.
218        let block = ContentBlock::RedactedThinking {
219            data: String::new(),
220        };
221
222        let estimate = TokenEstimator::estimate_block(&block);
223        assert_eq!(estimate, TokenEstimator::REDACTED_THINKING_MIN_TOKENS);
224    }
225
226    #[test]
227    fn test_redacted_thinking_accumulates_in_history() {
228        // 5 redacted thinking blocks at ~2000 tokens each should produce a
229        // meaningful total that triggers compaction.
230        let blocks: Vec<ContentBlock> = (0..5)
231            .map(|_| ContentBlock::RedactedThinking {
232                data: "B".repeat(10_000), // 10k base64 → 7500 raw → 1875 tokens
233            })
234            .collect();
235        let message = Message {
236            role: Role::Assistant,
237            content: Content::Blocks(blocks),
238        };
239
240        let estimate = TokenEstimator::estimate_message(&message);
241        // 5 × 1875 + 4 message overhead = 9379
242        assert_eq!(estimate, 9379);
243    }
244}