Skip to main content

token_count/tokenizers/claude/
mod.rs

1//! Tokenizer implementation for Anthropic Claude models
2//!
3//! This module provides tokenization for Claude models using a hybrid approach:
4//! - **Default**: Adaptive estimation based on content type (code vs. prose)
5//! - **Optional**: Exact counting via Anthropic API (requires API key and --accurate flag)
6
7mod api_client;
8mod estimation;
9mod models;
10
11pub use api_client::ClaudeApiClient;
12pub use estimation::{detect_content_type, estimate_tokens, ContentType};
13pub use models::claude_models;
14
15use crate::error::TokenError;
16use crate::tokenizers::registry::ModelConfig;
17use crate::tokenizers::{ModelInfo, Tokenizer};
18use std::env;
19
20/// Tokenizer for Anthropic Claude models
21pub struct ClaudeTokenizer {
22    /// Model configuration (name, context window, etc.)
23    config: ModelConfig,
24
25    /// Optional API client (only if --accurate flag set and API key available)
26    api_client: Option<ClaudeApiClient>,
27}
28
29impl ClaudeTokenizer {
30    /// Create a new Claude tokenizer
31    ///
32    /// # Arguments
33    /// * `config` - Model configuration (name, context window, etc.)
34    /// * `use_accurate` - Whether to use API for exact counts (--accurate flag)
35    ///
36    /// # Returns
37    /// * `Ok(Self)` - Successfully created tokenizer
38    /// * `Err(TokenError::MissingApiKey)` - API key required but not found
39    pub fn new(config: ModelConfig, use_accurate: bool) -> Result<Self, TokenError> {
40        let api_client = if use_accurate {
41            // Check for API key
42            match env::var("ANTHROPIC_API_KEY") {
43                Ok(key) if !key.is_empty() => Some(ClaudeApiClient::new(key)?),
44                _ => return Err(TokenError::MissingApiKey { model: config.name.clone() }),
45            }
46        } else {
47            None
48        };
49
50        Ok(Self { config, api_client })
51    }
52
53    /// Count tokens using estimation or API (async helper)
54    ///
55    /// This method handles the hybrid approach:
56    /// - If API client available, try API call with graceful fallback to estimation
57    /// - Otherwise, use estimation
58    async fn count_tokens_async(&self, text: &str) -> anyhow::Result<usize> {
59        if let Some(client) = &self.api_client {
60            // Try API, fall back to estimation on error
61            match client.count_tokens(&self.config.name, text).await {
62                Ok(count) => Ok(count),
63                Err(e) => {
64                    eprintln!("Warning: API call failed ({}), falling back to estimation", e);
65                    Ok(estimate_tokens(text))
66                }
67            }
68        } else {
69            // Estimation mode
70            Ok(estimate_tokens(text))
71        }
72    }
73}
74
75impl Tokenizer for ClaudeTokenizer {
76    fn count_tokens(&self, text: &str) -> anyhow::Result<usize> {
77        // Create a simple tokio runtime for the async API call
78        // This is necessary because the Tokenizer trait is sync
79        if self.api_client.is_some() {
80            let runtime = tokio::runtime::Builder::new_current_thread().enable_all().build()?;
81            runtime.block_on(self.count_tokens_async(text))
82        } else {
83            // Pure estimation (no async needed)
84            Ok(estimate_tokens(text))
85        }
86    }
87
88    fn get_model_info(&self) -> ModelInfo {
89        ModelInfo {
90            name: self.config.name.clone(),
91            encoding: "anthropic-claude".to_string(),
92            context_window: self.config.context_window,
93            description: self.config.description.clone(),
94        }
95    }
96}