token_count/tokenizers/claude/
mod.rs1mod api_client;
8mod estimation;
9mod models;
10
11pub use api_client::ClaudeApiClient;
12pub use estimation::{detect_content_type, estimate_tokens, ContentType};
13pub use models::claude_models;
14
15use crate::error::TokenError;
16use crate::tokenizers::registry::ModelConfig;
17use crate::tokenizers::{ModelInfo, Tokenizer};
18use std::env;
19
20pub struct ClaudeTokenizer {
22 config: ModelConfig,
24
25 api_client: Option<ClaudeApiClient>,
27}
28
29impl ClaudeTokenizer {
30 pub fn new(config: ModelConfig, use_accurate: bool) -> Result<Self, TokenError> {
40 let api_client = if use_accurate {
41 match env::var("ANTHROPIC_API_KEY") {
43 Ok(key) if !key.is_empty() => Some(ClaudeApiClient::new(key)?),
44 _ => return Err(TokenError::MissingApiKey { model: config.name.clone() }),
45 }
46 } else {
47 None
48 };
49
50 Ok(Self { config, api_client })
51 }
52
53 async fn count_tokens_async(&self, text: &str) -> anyhow::Result<usize> {
59 if let Some(client) = &self.api_client {
60 match client.count_tokens(&self.config.name, text).await {
62 Ok(count) => Ok(count),
63 Err(e) => {
64 eprintln!("Warning: API call failed ({}), falling back to estimation", e);
65 Ok(estimate_tokens(text))
66 }
67 }
68 } else {
69 Ok(estimate_tokens(text))
71 }
72 }
73}
74
75impl Tokenizer for ClaudeTokenizer {
76 fn count_tokens(&self, text: &str) -> anyhow::Result<usize> {
77 if self.api_client.is_some() {
80 let runtime = tokio::runtime::Builder::new_current_thread().enable_all().build()?;
81 runtime.block_on(self.count_tokens_async(text))
82 } else {
83 Ok(estimate_tokens(text))
85 }
86 }
87
88 fn get_model_info(&self) -> ModelInfo {
89 ModelInfo {
90 name: self.config.name.clone(),
91 encoding: "anthropic-claude".to_string(),
92 context_window: self.config.context_window,
93 description: self.config.description.clone(),
94 }
95 }
96}