use otherone_ai::types::Message;
pub fn estimate_tokens(messages: &[Message]) -> u32 {
if messages.is_empty() {
return 0;
}
let mut total_tokens: u32 = 0;
for message in messages {
match message.role.as_str() {
"user" => total_tokens += estimate_user_message_tokens(message),
"assistant" => total_tokens += estimate_assistant_message_tokens(message),
"system" => total_tokens += estimate_system_message_tokens(message),
"tool" => total_tokens += estimate_tool_message_tokens(message),
"developer" => total_tokens += estimate_developer_message_tokens(message),
_ => {
if let otherone_ai::types::MessageContent::Text(ref content) = message.content {
total_tokens += estimate_content_tokens(content);
}
}
}
}
total_tokens
}
fn estimate_user_message_tokens(message: &Message) -> u32 {
let mut tokens: u32 = 0;
match &message.content {
otherone_ai::types::MessageContent::Text(text) => {
tokens += estimate_content_tokens(text);
}
otherone_ai::types::MessageContent::MultiPart(parts) => {
for item in parts {
if item.content_type == "text" {
if let Some(ref text) = item.text {
tokens += estimate_content_tokens(text);
}
} else if item.content_type == "image_url" {
tokens += (5000_f64 / 4.0).ceil() as u32;
} else if item.content_type == "video_url" {
tokens += 300 * 263;
} else if item.content_type == "input_audio" {
tokens += 480 * 32;
}
}
}
}
tokens
}
fn estimate_assistant_message_tokens(message: &Message) -> u32 {
let mut tokens: u32 = 0;
match &message.content {
otherone_ai::types::MessageContent::Text(text) => {
tokens += estimate_content_tokens(text);
}
otherone_ai::types::MessageContent::MultiPart(parts) => {
for item in parts {
if item.content_type == "text" {
if let Some(ref text) = item.text {
tokens += estimate_content_tokens(text);
}
} else if item.content_type == "image_url" {
tokens += (5000_f64 / 4.0).ceil() as u32;
} else if item.content_type == "audio" {
tokens += 480 * 32;
}
}
}
}
if let Some(ref tool_calls) = message.tool_calls {
for tool_call in tool_calls {
tokens += estimate_content_tokens(&tool_call.function.name);
tokens += estimate_content_tokens(&tool_call.function.arguments);
}
}
tokens
}
fn estimate_system_message_tokens(message: &Message) -> u32 {
match &message.content {
otherone_ai::types::MessageContent::Text(text) => estimate_content_tokens(text),
_ => 0,
}
}
fn estimate_tool_message_tokens(message: &Message) -> u32 {
let mut tokens: u32 = 0;
match &message.content {
otherone_ai::types::MessageContent::Text(text) => {
tokens += estimate_content_tokens(text);
}
otherone_ai::types::MessageContent::MultiPart(parts) => {
for item in parts {
if item.content_type == "text" {
if let Some(ref text) = item.text {
tokens += estimate_content_tokens(text);
}
}
}
}
}
if let Some(ref name) = message.name {
tokens += estimate_content_tokens(name);
}
tokens
}
fn estimate_developer_message_tokens(message: &Message) -> u32 {
match &message.content {
otherone_ai::types::MessageContent::Text(text) => estimate_content_tokens(text),
_ => 0,
}
}
fn estimate_content_tokens(content: &str) -> u32 {
if content.is_empty() {
return 0;
}
let chinese_chars: usize = content
.chars()
.filter(|c| ('\u{4e00}'..='\u{9fa5}').contains(c))
.count();
let total_chars = content.chars().count();
let english_chars = total_chars - chinese_chars;
let chinese_tokens = (chinese_chars as f64 / 1.5).ceil() as u32;
let english_tokens = (english_chars as f64 / 4.0).ceil() as u32;
chinese_tokens + english_tokens
}
#[cfg(test)]
mod tests {
use super::*;
use otherone_ai::types::MessageContent;
#[test]
fn test_estimate_content_tokens_chinese() {
let tokens = estimate_content_tokens("你好");
assert!(tokens > 0);
}
#[test]
fn test_estimate_content_tokens_english() {
let tokens = estimate_content_tokens("hello");
assert!(tokens > 0);
}
#[test]
fn test_estimate_content_tokens_empty() {
let tokens = estimate_content_tokens("");
assert_eq!(tokens, 0);
}
#[test]
fn test_estimate_tokens_empty_messages() {
let tokens = estimate_tokens(&[]);
assert_eq!(tokens, 0);
}
#[test]
fn test_estimate_tokens_system_message() {
let messages = vec![otherone_ai::types::Message {
role: "system".to_string(),
content: MessageContent::Text("You are a helpful assistant.".to_string()),
name: None,
tool_calls: None,
tool_call_id: None,
}];
let tokens = estimate_tokens(&messages);
assert!(tokens > 0);
}
}