1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
use tantivy::tokenizer::Tokenizer; use crate::token_stream::TinySegmenterTokenStream; /// Tokenizer for Japanese text, based on TinySegmenter. #[derive(Clone)] pub struct TinySegmenterTokenizer; impl<'a> Tokenizer<'a> for TinySegmenterTokenizer { type TokenStreamImpl = TinySegmenterTokenStream; fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl { TinySegmenterTokenStream::new(text) } }