pub struct Tokenizer { /* private fields */ }Expand description
토크나이저
형태소 분석의 메인 인터페이스입니다. 시스템 사전, 사용자 사전, 미등록어 처리기를 통합하여 형태소 분석을 수행합니다.
§메모리 최적화
lattice재사용으로 매 분석마다 재할당 방지pool_manager로 Token, Node 객체 재사용- String interning으로 중복 문자열 제거
Implementations§
Source§impl Tokenizer
impl Tokenizer
Sourcepub fn with_user_dict(self, user_dict: UserDictionary) -> Tokenizer
pub fn with_user_dict(self, user_dict: UserDictionary) -> Tokenizer
Sourcepub fn set_user_dict(&mut self, user_dict: UserDictionary)
pub fn set_user_dict(&mut self, user_dict: UserDictionary)
사용자 사전 설정
Sourcepub fn with_space_penalty(self, penalty: SpacePenalty) -> Tokenizer
pub fn with_space_penalty(self, penalty: SpacePenalty) -> Tokenizer
Sourcepub fn tokenize_to_lattice(&mut self, text: &str) -> &Lattice
pub fn tokenize_to_lattice(&mut self, text: &str) -> &Lattice
Sourcepub const fn dictionary(&self) -> &SystemDictionary
pub const fn dictionary(&self) -> &SystemDictionary
시스템 사전 참조 반환
Sourcepub fn lattice_stats(&self) -> LatticeStats
pub fn lattice_stats(&self) -> LatticeStats
Lattice 통계 정보 (디버깅용)
Sourcepub fn pool_stats(&self) -> PoolStats
pub fn pool_stats(&self) -> PoolStats
메모리 풀 통계 정보
메모리 풀의 사용 현황을 반환합니다.
Sourcepub fn clear_pools(&self)
pub fn clear_pools(&self)
메모리 풀 초기화
모든 풀을 비워 메모리를 해제합니다. 장기 실행 프로세스에서 주기적으로 호출하여 메모리 누수 방지.
Sourcepub fn set_normalization(
&mut self,
enable: bool,
config: Option<NormalizationConfig>,
) -> Result<(), Error>
pub fn set_normalization( &mut self, enable: bool, config: Option<NormalizationConfig>, ) -> Result<(), Error>
Sourcepub const fn normalizer(&self) -> Option<&Normalizer>
pub const fn normalizer(&self) -> Option<&Normalizer>
외래어 정규화기 참조 반환
Sourcepub const fn is_normalization_enabled(&self) -> bool
pub const fn is_normalization_enabled(&self) -> bool
정규화가 활성화되어 있는지 확인
Sourcepub fn tokenize_with_normalization(&mut self, text: &str) -> Vec<Token>
pub fn tokenize_with_normalization(&mut self, text: &str) -> Vec<Token>
Auto Trait Implementations§
impl !Freeze for Tokenizer
impl !RefUnwindSafe for Tokenizer
impl Send for Tokenizer
impl !Sync for Tokenizer
impl Unpin for Tokenizer
impl UnsafeUnpin for Tokenizer
impl !UnwindSafe for Tokenizer
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more