pub struct ParallelTokenizer<T: Tokenizer + Sync> { /* private fields */ }Expand description
Parallel batch tokenization utilities for improved throughput
Implementations§
Source§impl<T: Tokenizer + Sync> ParallelTokenizer<T>
impl<T: Tokenizer + Sync> ParallelTokenizer<T>
Sourcepub fn with_chunk_size(tokenizer: T, chunk_size: usize) -> Self
pub fn with_chunk_size(tokenizer: T, chunk_size: usize) -> Self
Create a new parallel tokenizer wrapper with custom chunk size
Sourcepub fn encode_batch(&self, texts: &[&str]) -> Result<Vec<TokenizedInput>>
pub fn encode_batch(&self, texts: &[&str]) -> Result<Vec<TokenizedInput>>
Encode a batch of texts in parallel
Sourcepub fn encode_pair_batch(
&self,
text_pairs: &[(&str, &str)],
) -> Result<Vec<TokenizedInput>>
pub fn encode_pair_batch( &self, text_pairs: &[(&str, &str)], ) -> Result<Vec<TokenizedInput>>
Encode pairs of texts in parallel
Sourcepub fn decode_batch(&self, ids_batch: &[&[u32]]) -> Result<Vec<String>>
pub fn decode_batch(&self, ids_batch: &[&[u32]]) -> Result<Vec<String>>
Decode a batch of token IDs in parallel
Sourcepub fn chunk_size(&self) -> usize
pub fn chunk_size(&self) -> usize
Get the chunk size
Sourcepub fn set_chunk_size(&mut self, chunk_size: usize)
pub fn set_chunk_size(&mut self, chunk_size: usize)
Set the chunk size for batching
Auto Trait Implementations§
impl<T> Freeze for ParallelTokenizer<T>
impl<T> RefUnwindSafe for ParallelTokenizer<T>where
T: RefUnwindSafe,
impl<T> Send for ParallelTokenizer<T>
impl<T> Sync for ParallelTokenizer<T>
impl<T> Unpin for ParallelTokenizer<T>
impl<T> UnsafeUnpin for ParallelTokenizer<T>
impl<T> UnwindSafe for ParallelTokenizer<T>where
T: RefUnwindSafe,
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more