agent_sdk/context/
estimator.rs

1//! Token estimation for context size calculation.
2
3use crate::llm::{Content, ContentBlock, Message};
4
5/// Estimates token count for messages.
6///
7/// Uses a simple heuristic of ~4 characters per token, which provides
8/// a reasonable approximation for most English text and code.
9///
10/// For more accurate counting, consider using a tokenizer library
11/// specific to your model (e.g., tiktoken for `OpenAI` models).
12pub struct TokenEstimator;
13
14impl TokenEstimator {
15    /// Characters per token estimate.
16    /// This is a conservative estimate; actual ratio varies by content.
17    const CHARS_PER_TOKEN: usize = 4;
18
19    /// Overhead tokens per message (role, formatting).
20    const MESSAGE_OVERHEAD: usize = 4;
21
22    /// Overhead for tool use blocks (id, name, formatting).
23    const TOOL_USE_OVERHEAD: usize = 20;
24
25    /// Overhead for tool result blocks (id, formatting).
26    const TOOL_RESULT_OVERHEAD: usize = 10;
27
28    /// Minimum token estimate for redacted thinking blocks.
29    ///
30    /// Even small redacted thinking blocks carry significant API token cost
31    /// because they contain encrypted reasoning that the model must process.
32    const REDACTED_THINKING_MIN_TOKENS: usize = 512;
33
34    /// Estimate tokens for a text string.
35    #[must_use]
36    pub const fn estimate_text(text: &str) -> usize {
37        // Simple estimation: ~4 chars per token
38        text.len().div_ceil(Self::CHARS_PER_TOKEN)
39    }
40
41    /// Estimate tokens for a single message.
42    #[must_use]
43    pub fn estimate_message(message: &Message) -> usize {
44        let content_tokens = match &message.content {
45            Content::Text(text) => Self::estimate_text(text),
46            Content::Blocks(blocks) => blocks.iter().map(Self::estimate_block).sum(),
47        };
48
49        content_tokens + Self::MESSAGE_OVERHEAD
50    }
51
52    /// Estimate tokens for a content block.
53    #[must_use]
54    pub fn estimate_block(block: &ContentBlock) -> usize {
55        match block {
56            ContentBlock::Text { text } => Self::estimate_text(text),
57            ContentBlock::Thinking { thinking, .. } => Self::estimate_text(thinking),
58            ContentBlock::RedactedThinking { data } => {
59                // The data field is a base64-encoded encrypted blob whose size
60                // correlates with the original thinking content.  Base64 encodes
61                // 3 bytes into 4 chars, so `data.len() * 3 / 4` approximates
62                // the raw byte count.  Using the same chars-per-token heuristic
63                // on the raw bytes gives a reasonable lower bound.
64                //
65                // A floor of REDACTED_THINKING_MIN_TOKENS prevents tiny blocks
66                // from being under-counted — the API charges substantial token
67                // overhead for every redacted thinking block regardless of size.
68                let raw_bytes = data.len() * 3 / 4;
69                let estimated = raw_bytes.div_ceil(Self::CHARS_PER_TOKEN);
70                estimated.max(Self::REDACTED_THINKING_MIN_TOKENS)
71            }
72            ContentBlock::ToolUse { name, input, .. } => {
73                let input_str = serde_json::to_string(input).unwrap_or_default();
74                Self::estimate_text(name)
75                    + Self::estimate_text(&input_str)
76                    + Self::TOOL_USE_OVERHEAD
77            }
78            ContentBlock::ToolResult { content, .. } => {
79                Self::estimate_text(content) + Self::TOOL_RESULT_OVERHEAD
80            }
81            ContentBlock::Image { source } | ContentBlock::Document { source } => {
82                // Rough estimate: base64 data is ~4/3 of original, 1 token per 4 chars
83                source.data.len() / 4 + Self::MESSAGE_OVERHEAD
84            }
85        }
86    }
87
88    /// Estimate total tokens for a message history.
89    #[must_use]
90    pub fn estimate_history(messages: &[Message]) -> usize {
91        messages.iter().map(Self::estimate_message).sum()
92    }
93}
94
95#[cfg(test)]
96mod tests {
97    use super::*;
98    use crate::llm::Role;
99    use serde_json::json;
100
101    #[test]
102    fn test_estimate_text() {
103        // Empty text
104        assert_eq!(TokenEstimator::estimate_text(""), 0);
105
106        // Short text (less than 4 chars)
107        assert_eq!(TokenEstimator::estimate_text("hi"), 1);
108
109        // Exactly 4 chars
110        assert_eq!(TokenEstimator::estimate_text("test"), 1);
111
112        // 5 chars should be 2 tokens
113        assert_eq!(TokenEstimator::estimate_text("hello"), 2);
114
115        // Longer text
116        assert_eq!(TokenEstimator::estimate_text("hello world!"), 3); // 12 chars / 4 = 3
117    }
118
119    #[test]
120    fn test_estimate_text_message() {
121        let message = Message {
122            role: Role::User,
123            content: Content::Text("Hello, how are you?".to_string()), // 19 chars = 5 tokens
124        };
125
126        let estimate = TokenEstimator::estimate_message(&message);
127        // 5 content tokens + 4 overhead = 9
128        assert_eq!(estimate, 9);
129    }
130
131    #[test]
132    fn test_estimate_blocks_message() {
133        let message = Message {
134            role: Role::Assistant,
135            content: Content::Blocks(vec![
136                ContentBlock::Text {
137                    text: "Let me help.".to_string(), // 12 chars = 3 tokens
138                },
139                ContentBlock::ToolUse {
140                    id: "tool_123".to_string(),
141                    name: "read".to_string(),            // 4 chars = 1 token
142                    input: json!({"path": "/test.txt"}), // ~20 chars = 5 tokens
143                    thought_signature: None,
144                },
145            ]),
146        };
147
148        let estimate = TokenEstimator::estimate_message(&message);
149        // Text: 3 tokens
150        // ToolUse: 1 (name) + 5 (input) + 20 (overhead) = 26 tokens
151        // Message overhead: 4
152        // Total: 3 + 26 + 4 = 33
153        assert!(estimate > 25); // Verify it accounts for tool use
154    }
155
156    #[test]
157    fn test_estimate_tool_result() {
158        let message = Message {
159            role: Role::User,
160            content: Content::Blocks(vec![ContentBlock::ToolResult {
161                tool_use_id: "tool_123".to_string(),
162                content: "File contents here...".to_string(), // 21 chars = 6 tokens
163                is_error: None,
164            }]),
165        };
166
167        let estimate = TokenEstimator::estimate_message(&message);
168        // 6 content + 10 overhead + 4 message overhead = 20
169        assert_eq!(estimate, 20);
170    }
171
172    #[test]
173    fn test_estimate_history() {
174        let messages = vec![
175            Message::user("Hello"),          // 5 chars = 2 tokens + 4 overhead = 6
176            Message::assistant("Hi there!"), // 9 chars = 3 tokens + 4 overhead = 7
177            Message::user("How are you?"),   // 12 chars = 3 tokens + 4 overhead = 7
178        ];
179
180        let estimate = TokenEstimator::estimate_history(&messages);
181        assert_eq!(estimate, 20);
182    }
183
184    #[test]
185    fn test_empty_history() {
186        let messages: Vec<Message> = vec![];
187        assert_eq!(TokenEstimator::estimate_history(&messages), 0);
188    }
189
190    #[test]
191    fn test_estimate_redacted_thinking_uses_data_length() {
192        // Simulate a realistic redacted thinking blob (~8KB base64 data).
193        // 8192 base64 chars → ~6144 raw bytes → 6144/4 = 1536 estimated tokens.
194        let data = "A".repeat(8192);
195        let block = ContentBlock::RedactedThinking { data };
196
197        let estimate = TokenEstimator::estimate_block(&block);
198        assert_eq!(estimate, 1536);
199    }
200
201    #[test]
202    fn test_estimate_redacted_thinking_respects_minimum() {
203        // Tiny data blob: 100 base64 chars → ~75 raw bytes → 75/4 = 19 tokens.
204        // Should be clamped to the minimum (512).
205        let data = "A".repeat(100);
206        let block = ContentBlock::RedactedThinking { data };
207
208        let estimate = TokenEstimator::estimate_block(&block);
209        assert_eq!(estimate, TokenEstimator::REDACTED_THINKING_MIN_TOKENS);
210    }
211
212    #[test]
213    fn test_estimate_redacted_thinking_empty_data() {
214        // Empty data should return the minimum floor.
215        let block = ContentBlock::RedactedThinking {
216            data: String::new(),
217        };
218
219        let estimate = TokenEstimator::estimate_block(&block);
220        assert_eq!(estimate, TokenEstimator::REDACTED_THINKING_MIN_TOKENS);
221    }
222
223    #[test]
224    fn test_redacted_thinking_accumulates_in_history() {
225        // 5 redacted thinking blocks at ~2000 tokens each should produce a
226        // meaningful total that triggers compaction.
227        let blocks: Vec<ContentBlock> = (0..5)
228            .map(|_| ContentBlock::RedactedThinking {
229                data: "B".repeat(10_000), // 10k base64 → 7500 raw → 1875 tokens
230            })
231            .collect();
232        let message = Message {
233            role: Role::Assistant,
234            content: Content::Blocks(blocks),
235        };
236
237        let estimate = TokenEstimator::estimate_message(&message);
238        // 5 × 1875 + 4 message overhead = 9379
239        assert_eq!(estimate, 9379);
240    }
241}
agent_sdk/context/estimator.rs

agent_sdk/context/
estimator.rs