[−][src]Struct tantivy::tokenizer::NgramTokenizer
Tokenize the text by splitting words into n-grams of the given size(s)
With this tokenizer, the position
is always 0.
Beware however, in presence of multiple value for the same field,
the position will be POSITION_GAP * index of value
.
Example 1: hello
would be tokenized as (min_gram: 2, max_gram: 3, prefix_only: false)
Term | he | hel | el | ell | ll | llo | lo |
---|---|---|---|---|---|---|---|
Position | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Offsets | 0,2 | 0,3 | 1,3 | 1,4 | 2,4 | 2,5 | 3,5 |
Example 2: hello
would be tokenized as (min_gram: 2, max_gram: 5, prefix_only: true)
Term | he | hel | hell | hello |
---|---|---|---|---|
Position | 0 | 0 | 0 | 0 |
Offsets | 0,2 | 0,3 | 0,4 | 0,5 |
Example 3: hεllo
(non-ascii) would be tokenized as (min_gram: 2, max_gram: 5, prefix_only: true)
Term | hε | hεl | hεll | hεllo |
---|---|---|---|---|
Position | 0 | 0 | 0 | 0 |
Offsets | 0,3 | 0,4 | 0,5 | 0,6 |
Example
use tantivy::tokenizer::*; let tokenizer = NgramTokenizer::new(2, 3, false); let mut stream = tokenizer.token_stream("hello"); { let token = stream.next().unwrap(); assert_eq!(token.text, "he"); assert_eq!(token.offset_from, 0); assert_eq!(token.offset_to, 2); } { let token = stream.next().unwrap(); assert_eq!(token.text, "hel"); assert_eq!(token.offset_from, 0); assert_eq!(token.offset_to, 3); } { let token = stream.next().unwrap(); assert_eq!(token.text, "el"); assert_eq!(token.offset_from, 1); assert_eq!(token.offset_to, 3); } { let token = stream.next().unwrap(); assert_eq!(token.text, "ell"); assert_eq!(token.offset_from, 1); assert_eq!(token.offset_to, 4); } { let token = stream.next().unwrap(); assert_eq!(token.text, "ll"); assert_eq!(token.offset_from, 2); assert_eq!(token.offset_to, 4); } { let token = stream.next().unwrap(); assert_eq!(token.text, "llo"); assert_eq!(token.offset_from, 2); assert_eq!(token.offset_to, 5); } { let token = stream.next().unwrap(); assert_eq!(token.text, "lo"); assert_eq!(token.offset_from, 3); assert_eq!(token.offset_to, 5); } assert!(stream.next().is_none());
Methods
impl NgramTokenizer
[src]
pub fn new(
min_gram: usize,
max_gram: usize,
prefix_only: bool
) -> NgramTokenizer
[src]
min_gram: usize,
max_gram: usize,
prefix_only: bool
) -> NgramTokenizer
Configures a new Ngram tokenizer
pub fn all_ngrams(min_gram: usize, max_gram: usize) -> NgramTokenizer
[src]
Create a NGramTokenizer
which generates tokens for all inner ngrams.
This is as opposed to only prefix ngrams .
pub fn prefix_only(min_gram: usize, max_gram: usize) -> NgramTokenizer
[src]
Create a NGramTokenizer
which only generates tokens for the
prefix ngrams.
Trait Implementations
impl<'a> Tokenizer<'a> for NgramTokenizer
[src]
type TokenStreamImpl = NgramTokenStream<'a>
Type associated to the resulting tokenstream tokenstream.
fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl
[src]
fn filter<NewFilter>(
self,
new_filter: NewFilter
) -> ChainTokenizer<NewFilter, Self> where
NewFilter: TokenFilter<Self::TokenStreamImpl>,
[src]
self,
new_filter: NewFilter
) -> ChainTokenizer<NewFilter, Self> where
NewFilter: TokenFilter<Self::TokenStreamImpl>,
impl Clone for NgramTokenizer
[src]
fn clone(&self) -> NgramTokenizer
[src]
fn clone_from(&mut self, source: &Self)
1.0.0[src]
Auto Trait Implementations
impl Send for NgramTokenizer
impl Sync for NgramTokenizer
impl Unpin for NgramTokenizer
impl UnwindSafe for NgramTokenizer
impl RefUnwindSafe for NgramTokenizer
Blanket Implementations
impl<T> Fruit for T where
T: Send + Downcast,
[src]
T: Send + Downcast,
impl<T, U> Into<U> for T where
U: From<T>,
[src]
U: From<T>,
impl<T> From<T> for T
[src]
impl<T> ToOwned for T where
T: Clone,
[src]
T: Clone,
type Owned = T
The resulting type after obtaining ownership.
fn to_owned(&self) -> T
[src]
fn clone_into(&self, target: &mut T)
[src]
impl<T, U> TryFrom<U> for T where
U: Into<T>,
[src]
U: Into<T>,
type Error = Infallible
The type returned in the event of a conversion error.
fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>
[src]
impl<T, U> TryInto<U> for T where
U: TryFrom<T>,
[src]
U: TryFrom<T>,
type Error = <U as TryFrom<T>>::Error
The type returned in the event of a conversion error.
fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>
[src]
impl<T> Borrow<T> for T where
T: ?Sized,
[src]
T: ?Sized,
impl<T> BorrowMut<T> for T where
T: ?Sized,
[src]
T: ?Sized,
fn borrow_mut(&mut self) -> &mut T
[src]
impl<T> Any for T where
T: 'static + ?Sized,
[src]
T: 'static + ?Sized,
impl<T> Erased for T
[src]
impl<T> Downcast for T where
T: Any,
[src]
T: Any,
fn into_any(self: Box<T>) -> Box<dyn Any + 'static>
[src]
fn into_any_rc(self: Rc<T>) -> Rc<dyn Any + 'static>
[src]
fn as_any(&self) -> &(dyn Any + 'static)
[src]
fn as_any_mut(&mut self) -> &mut (dyn Any + 'static)
[src]
impl<T> DowncastSync for T where
T: Send + Sync + Any,
[src]
T: Send + Sync + Any,