use crate::llm::message::{ContentBlock, Message};
const BYTES_PER_TOKEN: f64 = 4.0;
const IMAGE_TOKEN_ESTIMATE: u64 = 2000;
pub fn estimate_tokens(content: &str) -> u64 {
(content.len() as f64 / BYTES_PER_TOKEN).round() as u64
}
pub fn estimate_block_tokens(block: &ContentBlock) -> u64 {
match block {
ContentBlock::Text { text } => estimate_tokens(text),
ContentBlock::ToolUse { name, input, .. } => {
let input_str = serde_json::to_string(input).unwrap_or_default();
estimate_tokens(name) + estimate_tokens(&input_str)
}
ContentBlock::ToolResult { content, .. } => estimate_tokens(content),
ContentBlock::Thinking { thinking, .. } => estimate_tokens(thinking),
ContentBlock::Image { .. } => IMAGE_TOKEN_ESTIMATE,
}
}
pub fn estimate_message_tokens(msg: &Message) -> u64 {
match msg {
Message::User(u) => {
let overhead = 4;
let content: u64 = u.content.iter().map(estimate_block_tokens).sum();
overhead + content
}
Message::Assistant(a) => {
let overhead = 4;
let content: u64 = a.content.iter().map(estimate_block_tokens).sum();
overhead + content
}
Message::System(s) => {
let overhead = 4;
overhead + estimate_tokens(&s.content)
}
}
}
pub fn estimate_context_tokens(messages: &[Message]) -> u64 {
if messages.is_empty() {
return 0;
}
let mut last_usage_idx = None;
for (i, msg) in messages.iter().enumerate().rev() {
if let Message::Assistant(a) = msg
&& a.usage.is_some()
{
last_usage_idx = Some(i);
break;
}
}
match last_usage_idx {
Some(idx) => {
let usage = messages[idx]
.as_assistant()
.and_then(|a| a.usage.as_ref())
.unwrap();
let api_tokens = usage.total();
let new_tokens: u64 = messages[idx + 1..]
.iter()
.map(estimate_message_tokens)
.sum();
api_tokens + new_tokens
}
None => {
messages.iter().map(estimate_message_tokens).sum()
}
}
}
pub fn context_window_for_model(model: &str) -> u64 {
let lower = model.to_lowercase();
if lower.contains("1m") || lower.contains("1000k") {
return 1_000_000;
}
if lower.contains("opus") || lower.contains("sonnet") || lower.contains("haiku") {
200_000
} else if lower.contains("gpt-4") {
128_000
} else if lower.contains("gpt-3.5") {
16_384
} else {
128_000
}
}
pub fn max_output_tokens_for_model(model: &str) -> u64 {
let lower = model.to_lowercase();
if lower.contains("opus") || lower.contains("sonnet") {
16_384
} else if lower.contains("haiku") {
8_192
} else {
16_384
}
}
pub fn max_thinking_tokens_for_model(model: &str) -> u64 {
let lower = model.to_lowercase();
if lower.contains("opus") {
32_000
} else if lower.contains("sonnet") {
16_000
} else if lower.contains("haiku") {
8_000
} else {
16_000
}
}
trait AsAssistant {
fn as_assistant(&self) -> Option<&crate::llm::message::AssistantMessage>;
}
impl AsAssistant for Message {
fn as_assistant(&self) -> Option<&crate::llm::message::AssistantMessage> {
match self {
Message::Assistant(a) => Some(a),
_ => None,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_estimate_tokens() {
let text = "a".repeat(100);
assert_eq!(estimate_tokens(&text), 25);
}
#[test]
fn test_empty_messages() {
assert_eq!(estimate_context_tokens(&[]), 0);
}
}