pub struct StreamingTokenizer<T: Tokenizer> { /* private fields */ }Expand description
Streaming tokenizer for processing large texts efficiently
Implementations§
Source§impl<T: Tokenizer> StreamingTokenizer<T>
impl<T: Tokenizer> StreamingTokenizer<T>
Sourcepub fn with_buffer_size(self, buffer_size: usize) -> Self
pub fn with_buffer_size(self, buffer_size: usize) -> Self
Set the buffer size for reading from stream
Sourcepub fn with_overlap_size(self, overlap_size: usize) -> Self
pub fn with_overlap_size(self, overlap_size: usize) -> Self
Set the overlap size between chunks
Sourcepub fn with_max_chunk_length(self, max_length: usize) -> Self
pub fn with_max_chunk_length(self, max_length: usize) -> Self
Set maximum chunk length for tokenization
Sourcepub fn process_stream<R: Read>(&self, reader: R) -> Result<Vec<TokenizedInput>>
pub fn process_stream<R: Read>(&self, reader: R) -> Result<Vec<TokenizedInput>>
Process a stream of text and return tokenized chunks
Sourcepub fn process_text(&self, text: &str) -> Result<Vec<TokenizedInput>>
pub fn process_text(&self, text: &str) -> Result<Vec<TokenizedInput>>
Process text from a string in streaming fashion
Sourcepub fn process_lines<I>(&self, lines: I) -> Result<Vec<TokenizedInput>>
pub fn process_lines<I>(&self, lines: I) -> Result<Vec<TokenizedInput>>
Process an iterator of text lines
Sourcepub fn buffer_size(&self) -> usize
pub fn buffer_size(&self) -> usize
Get buffer size
Sourcepub fn overlap_size(&self) -> usize
pub fn overlap_size(&self) -> usize
Get overlap size
Sourcepub fn max_chunk_length(&self) -> Option<usize>
pub fn max_chunk_length(&self) -> Option<usize>
Get max chunk length
Auto Trait Implementations§
impl<T> Freeze for StreamingTokenizer<T>where
T: Freeze,
impl<T> RefUnwindSafe for StreamingTokenizer<T>where
T: RefUnwindSafe,
impl<T> Send for StreamingTokenizer<T>
impl<T> Sync for StreamingTokenizer<T>
impl<T> Unpin for StreamingTokenizer<T>where
T: Unpin,
impl<T> UnsafeUnpin for StreamingTokenizer<T>where
T: UnsafeUnpin,
impl<T> UnwindSafe for StreamingTokenizer<T>where
T: UnwindSafe,
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more