pub struct Tokenizer(/* private fields */);Expand description
Main tokenizer wrapper that provides a unified interface for different tokenizer implementations
Implementations§
Source§impl Tokenizer
impl Tokenizer
Sourcepub fn from_file_with_chat_template(
file_path: &str,
chat_template_path: Option<&str>,
) -> Result<Tokenizer>
pub fn from_file_with_chat_template( file_path: &str, chat_template_path: Option<&str>, ) -> Result<Tokenizer>
Create a tokenizer from a file path with an optional chat template
Sourcepub fn decode_stream(
&self,
prompt_token_ids: &[u32],
skip_special_tokens: bool,
) -> DecodeStream
pub fn decode_stream( &self, prompt_token_ids: &[u32], skip_special_tokens: bool, ) -> DecodeStream
Create a stateful sequence object for decoding token_ids into text
Sourcepub fn encode(&self, input: &str, add_special_tokens: bool) -> Result<Encoding>
pub fn encode(&self, input: &str, add_special_tokens: bool) -> Result<Encoding>
Direct encode method
Set add_special_tokens to true for embeddings (to add BOS/EOS tokens configured in tokenizer_config.json),
or false for chat completion (where the chat template handles special tokens).
Sourcepub fn encode_batch(
&self,
inputs: &[&str],
add_special_tokens: bool,
) -> Result<Vec<Encoding>>
pub fn encode_batch( &self, inputs: &[&str], add_special_tokens: bool, ) -> Result<Vec<Encoding>>
Direct batch encode method
Set add_special_tokens to true for embeddings (to add BOS/EOS tokens configured in tokenizer_config.json),
or false for chat completion (where the chat template handles special tokens).
Sourcepub fn decode(
&self,
token_ids: &[u32],
skip_special_tokens: bool,
) -> Result<String>
pub fn decode( &self, token_ids: &[u32], skip_special_tokens: bool, ) -> Result<String>
Direct decode method
Sourcepub fn vocab_size(&self) -> usize
pub fn vocab_size(&self) -> usize
Get vocabulary size
Sourcepub fn get_special_tokens(&self) -> &SpecialTokens
pub fn get_special_tokens(&self) -> &SpecialTokens
Get special tokens
Sourcepub fn token_to_id(&self, token: &str) -> Option<u32>
pub fn token_to_id(&self, token: &str) -> Option<u32>
Convert token string to ID
Sourcepub fn id_to_token(&self, id: u32) -> Option<String>
pub fn id_to_token(&self, id: u32) -> Option<String>
Convert ID to token string
Trait Implementations§
Auto Trait Implementations§
impl Freeze for Tokenizer
impl !RefUnwindSafe for Tokenizer
impl Send for Tokenizer
impl Sync for Tokenizer
impl Unpin for Tokenizer
impl UnsafeUnpin for Tokenizer
impl !UnwindSafe for Tokenizer
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more