pub struct StreamingTokenizer { /* private fields */ }Expand description
스트리밍 토크나이저
대용량 텍스트를 청크 단위로 처리하며, 문장 경계를 고려하여 올바른 토큰화를 보장합니다.
Implementations§
Source§impl StreamingTokenizer
impl StreamingTokenizer
Sourcepub const DEFAULT_CHUNK_SIZE: usize = 8192
pub const DEFAULT_CHUNK_SIZE: usize = 8192
기본 청크 크기 (8KB)
Sourcepub fn new(tokenizer: Tokenizer) -> StreamingTokenizer
pub fn new(tokenizer: Tokenizer) -> StreamingTokenizer
Sourcepub fn with_chunk_size(self, size: usize) -> StreamingTokenizer
pub fn with_chunk_size(self, size: usize) -> StreamingTokenizer
Sourcepub fn with_sentence_delimiters(
self,
delimiters: Vec<char>,
) -> StreamingTokenizer
pub fn with_sentence_delimiters( self, delimiters: Vec<char>, ) -> StreamingTokenizer
Sourcepub fn process_chunk(&mut self, chunk: &str) -> Vec<Token>
pub fn process_chunk(&mut self, chunk: &str) -> Vec<Token>
Sourcepub fn buffer_len(&self) -> usize
pub fn buffer_len(&self) -> usize
버퍼 크기 확인
Sourcepub const fn total_chars_processed(&self) -> usize
pub const fn total_chars_processed(&self) -> usize
처리된 문자 수
Auto Trait Implementations§
impl !Freeze for StreamingTokenizer
impl !RefUnwindSafe for StreamingTokenizer
impl Send for StreamingTokenizer
impl !Sync for StreamingTokenizer
impl Unpin for StreamingTokenizer
impl UnsafeUnpin for StreamingTokenizer
impl !UnwindSafe for StreamingTokenizer
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more