pub struct TokenizerBridge { /* private fields */ }Expand description
Tokenizer bridge wrapping the HuggingFace tokenizers library.
Implementations§
Source§impl TokenizerBridge
impl TokenizerBridge
Sourcepub fn from_file(path: &str) -> RuntimeResult<Self>
pub fn from_file(path: &str) -> RuntimeResult<Self>
Load a tokenizer from a JSON file path.
Sourcepub fn from_bytes(json: &[u8]) -> RuntimeResult<Self>
pub fn from_bytes(json: &[u8]) -> RuntimeResult<Self>
Create a tokenizer from JSON bytes (e.g., from GGUF metadata).
Sourcepub fn decode(&self, tokens: &[u32]) -> RuntimeResult<String>
pub fn decode(&self, tokens: &[u32]) -> RuntimeResult<String>
Decode token IDs back to text.
Sourcepub fn vocab_size(&self) -> usize
pub fn vocab_size(&self) -> usize
Get the vocabulary size.
Sourcepub fn bos_token_id(&self) -> Option<u32>
pub fn bos_token_id(&self) -> Option<u32>
Get the BOS (beginning of sequence) token ID, if any.
Sourcepub fn eos_token_id(&self) -> Option<u32>
pub fn eos_token_id(&self) -> Option<u32>
Get the EOS (end of sequence) token ID, if any.
Sourcepub fn id_to_token(&self, id: u32) -> Option<String>
pub fn id_to_token(&self, id: u32) -> Option<String>
Get the string representation of a single token ID.
Returns None if the id is not in the vocabulary.
Sourcepub fn token_to_bytes(&self, id: u32) -> Option<Vec<u8>>
pub fn token_to_bytes(&self, id: u32) -> Option<Vec<u8>>
Get the byte representation of a single token ID.
Uses decode (skip_special_tokens=false) to produce the canonical bytes
for byte-level BPE tokenizers. Returns None if the id is unknown.
Sourcepub fn vocab_bytes(&self) -> Vec<(u32, Vec<u8>)>
pub fn vocab_bytes(&self) -> Vec<(u32, Vec<u8>)>
Build the full vocab as (token_id, byte_representation) pairs.
This is used to pre-compute the vocabulary for grammar masking. The result can be cached and shared across generation steps.
Sourcepub fn vocab_bytes_cached(&self) -> &[(u32, Vec<u8>)]
pub fn vocab_bytes_cached(&self) -> &[(u32, Vec<u8>)]
Get cached vocabulary bytes. Computes on first call, returns cached thereafter.
Auto Trait Implementations§
impl !Freeze for TokenizerBridge
impl RefUnwindSafe for TokenizerBridge
impl Send for TokenizerBridge
impl Sync for TokenizerBridge
impl Unpin for TokenizerBridge
impl UnsafeUnpin for TokenizerBridge
impl UnwindSafe for TokenizerBridge
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more