Skip to main content

Tokenizer

Trait Tokenizer 

Source
pub trait Tokenizer: Encoder + Decoder {
    // Required methods
    fn vocab_size(&self) -> usize;
    fn get_special_tokens(&self) -> &SpecialTokens;
    fn token_to_id(&self, token: &str) -> Option<TokenIdType>;
    fn id_to_token(&self, id: TokenIdType) -> Option<String>;
    fn as_any(&self) -> &dyn Any;

    // Provided methods
    fn apply_chat_template(
        &self,
        _messages: &[Value],
        _params: ChatTemplateParams<'_>,
    ) -> Result<String> { ... }
    fn chat_template_content_format(&self) -> ChatTemplateContentFormat { ... }
    fn thinking_toggle(&self) -> ThinkingToggle { ... }
    fn thinking_key_name(&self) -> Option<ThinkingKeyName> { ... }
    fn think_in_prefill(&self) -> bool { ... }
    fn set_chat_template(&mut self, _template: String) -> Result<()> { ... }
}
Expand description

Combined tokenizer trait

Required Methods§

Source

fn vocab_size(&self) -> usize

Source

fn get_special_tokens(&self) -> &SpecialTokens

Source

fn token_to_id(&self, token: &str) -> Option<TokenIdType>

Source

fn id_to_token(&self, id: TokenIdType) -> Option<String>

Source

fn as_any(&self) -> &dyn Any

Enable downcasting to concrete types

Provided Methods§

Source

fn apply_chat_template( &self, _messages: &[Value], _params: ChatTemplateParams<'_>, ) -> Result<String>

Apply chat template to messages. Default returns an error for tokenizers without template support.

Source

fn chat_template_content_format(&self) -> ChatTemplateContentFormat

Get the content format expected by the chat template.

Source

fn thinking_toggle(&self) -> ThinkingToggle

Get the thinking toggle support for this template.

Source

fn thinking_key_name(&self) -> Option<ThinkingKeyName>

The variable name the template uses for the thinking toggle.

Source

fn think_in_prefill(&self) -> bool

Whether the template injects <think> in the generation prompt.

Source

fn set_chat_template(&mut self, _template: String) -> Result<()>

Set or override the chat template.

Returns an error if the template fails to parse or the tokenizer does not support chat templates.

Implementors§