#[cfg(feature = "tokenizer-tiktoken")]
use std::sync::Arc;
#[cfg(feature = "tokenizer-tiktoken")]
use entelix_core::Error;
use entelix_core::{Result, TokenCounterRegistry};
pub fn default_token_counter_registry() -> Result<TokenCounterRegistry> {
let registry = TokenCounterRegistry::new();
#[cfg(feature = "tokenizer-tiktoken")]
let registry = {
use entelix_tokenizer_tiktoken::{TiktokenCounter, TiktokenEncoding};
let o200k: Arc<dyn entelix_core::TokenCounter> = Arc::new(
TiktokenCounter::for_encoding(TiktokenEncoding::O200kBase)
.map_err(|e| Error::config(format!("default registry: load o200k_base: {e}")))?,
);
let cl100k: Arc<dyn entelix_core::TokenCounter> = Arc::new(
TiktokenCounter::for_encoding(TiktokenEncoding::Cl100kBase)
.map_err(|e| Error::config(format!("default registry: load cl100k_base: {e}")))?,
);
registry
.register("openai", "gpt-5", Arc::clone(&o200k))
.register("openai", "gpt-4o", Arc::clone(&o200k))
.register("openai", "chatgpt-4o", Arc::clone(&o200k))
.register("openai", "gpt-4.1", Arc::clone(&o200k))
.register("openai", "gpt-4.5", Arc::clone(&o200k))
.register("openai", "o1", Arc::clone(&o200k))
.register("openai", "o3", Arc::clone(&o200k))
.register("openai", "o4", Arc::clone(&o200k))
.register("openai", "gpt-4", Arc::clone(&cl100k))
.register("openai", "gpt-3.5", Arc::clone(&cl100k))
.register("openai", "text-embedding-3", cl100k)
};
Ok(registry)
}
#[cfg(test)]
#[cfg(feature = "tokenizer-tiktoken")]
#[allow(clippy::expect_used)]
mod tests {
use super::*;
#[test]
fn default_registry_routes_openai_models() {
let reg = default_token_counter_registry().expect("load embedded tables");
for (model, expected) in [
("gpt-5", "o200k_base"),
("gpt-4o-mini", "o200k_base"),
("chatgpt-4o-latest", "o200k_base"),
("o3-mini", "o200k_base"),
("o4-mini", "o200k_base"),
("gpt-4.1", "o200k_base"),
("gpt-4.1-mini", "o200k_base"),
("gpt-4.5-preview", "o200k_base"),
] {
let r = reg.resolve("openai", model);
assert!(r.is_match(), "{model} should be a registered entry");
assert_eq!(r.counter().encoding_name(), expected, "{model}");
}
for (model, expected) in [
("gpt-4-turbo", "cl100k_base"),
("gpt-3.5-turbo", "cl100k_base"),
("text-embedding-3-small", "cl100k_base"),
] {
let r = reg.resolve("openai", model);
assert!(r.is_match(), "{model} should be a registered entry");
assert_eq!(r.counter().encoding_name(), expected, "{model}");
}
let unknown = reg.resolve("openai", "davinci-002");
assert!(
unknown.is_fallback(),
"unknown model must surface as fallback"
);
assert_eq!(unknown.counter().encoding_name(), "byte-count-naive");
}
#[test]
fn default_registry_routes_anthropic_through_fallback() {
let reg = default_token_counter_registry().expect("load embedded tables");
let r = reg.resolve("anthropic", "claude-sonnet-4-5");
assert!(r.is_fallback());
assert_eq!(r.counter().encoding_name(), "byte-count-naive");
}
}