mod api_client;
mod estimation;
mod models;
pub use api_client::ClaudeApiClient;
pub use estimation::{detect_content_type, estimate_tokens, ContentType};
pub use models::claude_models;
use crate::error::TokenError;
use crate::tokenizers::registry::ModelConfig;
use crate::tokenizers::{ModelInfo, TokenDetail, Tokenizer};
use std::env;
pub struct ClaudeTokenizer {
config: ModelConfig,
api_client: Option<ClaudeApiClient>,
}
impl ClaudeTokenizer {
pub fn new(config: ModelConfig, use_accurate: bool) -> Result<Self, TokenError> {
let api_client = if use_accurate {
match env::var("ANTHROPIC_API_KEY") {
Ok(key) if !key.is_empty() => Some(ClaudeApiClient::new(key)?),
_ => return Err(TokenError::MissingApiKey { model: config.name.clone() }),
}
} else {
None
};
Ok(Self { config, api_client })
}
async fn count_tokens_async(&self, text: &str) -> anyhow::Result<usize> {
if let Some(client) = &self.api_client {
match client.count_tokens(&self.config.name, text).await {
Ok(count) => Ok(count),
Err(e) => {
eprintln!("Warning: API call failed ({}), falling back to estimation", e);
Ok(estimate_tokens(text))
}
}
} else {
Ok(estimate_tokens(text))
}
}
}
impl Tokenizer for ClaudeTokenizer {
fn count_tokens(&self, text: &str) -> anyhow::Result<usize> {
if self.api_client.is_some() {
let runtime = tokio::runtime::Builder::new_current_thread().enable_all().build()?;
runtime.block_on(self.count_tokens_async(text))
} else {
Ok(estimate_tokens(text))
}
}
fn get_model_info(&self) -> ModelInfo {
ModelInfo {
name: self.config.name.clone(),
encoding: "anthropic-claude".to_string(),
context_window: self.config.context_window,
description: self.config.description.clone(),
}
}
fn encode_with_details(&self, _text: &str) -> anyhow::Result<Option<Vec<TokenDetail>>> {
Ok(None)
}
}