m2m/tokenizer/mod.rs
1//! Token counting utilities.
2//!
3//! This module provides accurate token counting using tiktoken encodings
4//! for OpenAI-compatible models, with heuristic fallback for others.
5//!
6//! # Supported Encodings
7//!
8//! - **cl100k_base**: GPT-3.5, GPT-4, Claude (approximate)
9//! - **o200k_base**: GPT-4o, o1, o3 models
10//! - **heuristic**: ~4 characters per token fallback
11//!
12//! # Example
13//!
14//! ```
15//! use m2m::tokenizer::{count_tokens, count_tokens_with_encoding};
16//! use m2m::models::Encoding;
17//!
18//! // Count with default encoding (cl100k)
19//! let tokens = count_tokens("Hello, world!");
20//! println!("Token count: {}", tokens);
21//!
22//! // Count with specific encoding
23//! let tokens = count_tokens_with_encoding("Hello, world!", Encoding::O200kBase);
24//! println!("Token count (o200k): {}", tokens);
25//! ```
26
27mod counter;
28
29pub use counter::{
30 count_tokens, count_tokens_for_model, count_tokens_with_encoding, estimate_savings,
31 TokenCounter,
32};