1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
//! Lightweight token estimation for context budget management.
use crate::llm::types::{ContentBlock, Message};
/// Estimate token count for a text string using 4 chars/token heuristic.
///
/// This is a fast, dependency-free approximation. No external tokenizer needed.
pub fn estimate_tokens(text: &str) -> u32 {
// 4 chars per token is a reasonable average for English + code
(text.len() as u32).div_ceil(4)
}
/// Estimate token count for a single message, including all content blocks.
///
/// Adds a small overhead per message for role/structure tokens.
pub fn estimate_message_tokens(message: &Message) -> u32 {
const MESSAGE_OVERHEAD: u32 = 4; // role, separators
let content_tokens: u32 = message
.content
.iter()
.map(|block| match block {
ContentBlock::Text { text } => estimate_tokens(text),
ContentBlock::ToolUse { id, name, input } => {
estimate_tokens(id) + estimate_tokens(name) + estimate_tokens(&input.to_string())
}
ContentBlock::ToolResult {
tool_use_id,
content,
..
} => estimate_tokens(tool_use_id) + estimate_tokens(content),
ContentBlock::Image { data, .. } => {
// Base64 images are ~1.37x the raw size. Anthropic vision bills
// based on image dimensions, but for context window estimation
// we approximate: each 750 base64 chars ≈ 1 token.
(data.len() as u32) / 750 + 85 // 85 = overhead for the image block structure
}
ContentBlock::Audio { data, .. } => {
// Rough audio token estimate: base64 audio ÷ 750 + overhead.
(data.len() as u32) / 750 + 50
}
})
.sum();
MESSAGE_OVERHEAD + content_tokens
}
#[cfg(test)]
mod tests {
use super::*;
use crate::llm::types::Message;
use serde_json::json;
#[test]
fn estimate_tokens_empty() {
assert_eq!(estimate_tokens(""), 0);
}
#[test]
fn estimate_tokens_short() {
// "hello" = 5 chars → ceil(5/4) = 2 tokens
assert_eq!(estimate_tokens("hello"), 2);
}
#[test]
fn estimate_tokens_exact_multiple() {
// 8 chars → ceil(8/4) = 2 tokens
assert_eq!(estimate_tokens("abcdefgh"), 2);
}
#[test]
fn estimate_tokens_longer_text() {
// 100 chars → ceil(100/4) = 25 tokens
let text = "a".repeat(100);
assert_eq!(estimate_tokens(&text), 25);
}
#[test]
fn estimate_message_tokens_text_block() {
let msg = Message::user("hello world"); // 11 chars → ceil(11/4) = 3 + 4 overhead = 7
let tokens = estimate_message_tokens(&msg);
assert_eq!(tokens, 4 + 3); // overhead + content
}
#[test]
fn estimate_message_tokens_tool_use_block() {
let msg = Message {
role: crate::llm::types::Role::Assistant,
content: vec![ContentBlock::ToolUse {
id: "call-1".into(),
name: "search".into(),
input: json!({"q": "rust"}),
}],
};
let tokens = estimate_message_tokens(&msg);
// 4 overhead + estimate("call-1") + estimate("search") + estimate(json string)
assert!(tokens > 4);
}
#[test]
fn estimate_message_tokens_tool_result_block() {
let msg = Message {
role: crate::llm::types::Role::User,
content: vec![ContentBlock::ToolResult {
tool_use_id: "call-1".into(),
content: "search results here".into(),
is_error: false,
}],
};
let tokens = estimate_message_tokens(&msg);
// 4 overhead + estimate("call-1") + estimate("search results here")
assert!(tokens > 4);
}
#[test]
fn estimate_message_tokens_audio_block() {
let msg = Message {
role: crate::llm::types::Role::User,
content: vec![ContentBlock::Audio {
format: "ogg".into(),
data: "a".repeat(1500), // 1500 / 750 + 50 = 52
}],
};
let tokens = estimate_message_tokens(&msg);
// 4 overhead + 52 audio = 56
assert_eq!(tokens, 4 + 52);
}
#[test]
fn estimate_message_tokens_multiple_blocks() {
let msg = Message {
role: crate::llm::types::Role::Assistant,
content: vec![
ContentBlock::Text {
text: "Let me search.".into(),
},
ContentBlock::ToolUse {
id: "c1".into(),
name: "search".into(),
input: json!({"q": "test"}),
},
],
};
let tokens = estimate_message_tokens(&msg);
// Should be more than a single text block
assert!(tokens > estimate_message_tokens(&Message::user("Let me search.")));
}
}