pub trait Tokenizer: Send + Sync {
// Required methods
fn encode(&self, text: &str, add_special: bool) -> Result<Vec<TokenId>>;
fn decode(&self, tokens: &[TokenId], skip_special: bool) -> Result<String>;
fn decode_incremental(
&self,
prev: &[TokenId],
next: TokenId,
) -> Result<String>;
fn vocab_size(&self) -> usize;
fn special_tokens(&self) -> &SpecialTokens;
fn token_id(&self, text: &str) -> Option<TokenId>;
fn token_text(&self, token_id: TokenId) -> Option<&str>;
fn info(&self) -> TokenizerInfo;
// Provided methods
fn is_special_token(&self, token_id: TokenId) -> bool { ... }
fn apply_chat_template(&self, messages: &[ChatMessage]) -> Result<String> { ... }
}Expand description
Core tokenizer trait for encoding/decoding operations
Required Methods§
Sourcefn encode(&self, text: &str, add_special: bool) -> Result<Vec<TokenId>>
fn encode(&self, text: &str, add_special: bool) -> Result<Vec<TokenId>>
Encode text to token IDs
Sourcefn decode(&self, tokens: &[TokenId], skip_special: bool) -> Result<String>
fn decode(&self, tokens: &[TokenId], skip_special: bool) -> Result<String>
Decode token IDs to text
Sourcefn decode_incremental(&self, prev: &[TokenId], next: TokenId) -> Result<String>
fn decode_incremental(&self, prev: &[TokenId], next: TokenId) -> Result<String>
Incremental decode: given previous tokens and new token, return only the new text This is crucial for streaming applications to avoid re-decoding all tokens
Sourcefn vocab_size(&self) -> usize
fn vocab_size(&self) -> usize
Get vocabulary size
Sourcefn special_tokens(&self) -> &SpecialTokens
fn special_tokens(&self) -> &SpecialTokens
Get special tokens configuration
Sourcefn token_id(&self, text: &str) -> Option<TokenId>
fn token_id(&self, text: &str) -> Option<TokenId>
Get token ID for a specific text (if exists in vocabulary)
Sourcefn token_text(&self, token_id: TokenId) -> Option<&str>
fn token_text(&self, token_id: TokenId) -> Option<&str>
Get text for a specific token ID
Sourcefn info(&self) -> TokenizerInfo
fn info(&self) -> TokenizerInfo
Get tokenizer information
Provided Methods§
Sourcefn is_special_token(&self, token_id: TokenId) -> bool
fn is_special_token(&self, token_id: TokenId) -> bool
Check if token is a special token
Sourcefn apply_chat_template(&self, messages: &[ChatMessage]) -> Result<String>
fn apply_chat_template(&self, messages: &[ChatMessage]) -> Result<String>
Apply chat template if supported