Struct tantivy::tokenizer::NgramTokenizer[−][src]

pub struct NgramTokenizer { /* fields omitted */ }

Expand description

Tokenize the text by splitting words into n-grams of the given size(s)

With this tokenizer, the position is always 0. Beware however, in presence of multiple value for the same field, the position will be POSITION_GAP * index of value.

Example 1: hello would be tokenized as (min_gram: 2, max_gram: 3, prefix_only: false)

Term	he	hel	el	ell	ll	llo	lo
Position	0	0	0	0	0	0	0
Offsets	0,2	0,3	1,3	1,4	2,4	2,5	3,5

Example 2: hello would be tokenized as (min_gram: 2, max_gram: 5, prefix_only: true)

Term	he	hel	hell	hello
Position	0	0	0	0
Offsets	0,2	0,3	0,4	0,5

Example 3: hεllo (non-ascii) would be tokenized as (min_gram: 2, max_gram: 5, prefix_only: true)

Term	hε	hεl	hεll	hεllo
Position	0	0	0	0
Offsets	0,3	0,4	0,5	0,6

Example

use tantivy::tokenizer::*;

let tokenizer = NgramTokenizer::new(2, 3, false);
let mut stream = tokenizer.token_stream("hello");
{
    let token = stream.next().unwrap();
    assert_eq!(token.text, "he");
    assert_eq!(token.offset_from, 0);
    assert_eq!(token.offset_to, 2);
}
{
  let token = stream.next().unwrap();
    assert_eq!(token.text, "hel");
    assert_eq!(token.offset_from, 0);
    assert_eq!(token.offset_to, 3);
}
{
  let token = stream.next().unwrap();
    assert_eq!(token.text, "el");
    assert_eq!(token.offset_from, 1);
    assert_eq!(token.offset_to, 3);
}
{
  let token = stream.next().unwrap();
    assert_eq!(token.text, "ell");
    assert_eq!(token.offset_from, 1);
    assert_eq!(token.offset_to, 4);
}
{
  let token = stream.next().unwrap();
    assert_eq!(token.text, "ll");
    assert_eq!(token.offset_from, 2);
    assert_eq!(token.offset_to, 4);
}
{
  let token = stream.next().unwrap();
    assert_eq!(token.text, "llo");
    assert_eq!(token.offset_from, 2);
    assert_eq!(token.offset_to, 5);
}
{
  let token = stream.next().unwrap();
  assert_eq!(token.text, "lo");
  assert_eq!(token.offset_from, 3);
  assert_eq!(token.offset_to, 5);
}
assert!(stream.next().is_none());

Implementations

[src]

impl NgramTokenizer

[src]

pub fn new(
    min_gram: usize,
    max_gram: usize,
    prefix_only: bool
) -> NgramTokenizer

Configures a new Ngram tokenizer

[src]

pub fn all_ngrams(min_gram: usize, max_gram: usize) -> NgramTokenizer

Create a NGramTokenizer which generates tokens for all inner ngrams.

This is as opposed to only prefix ngrams .

[src]

pub fn prefix_only(min_gram: usize, max_gram: usize) -> NgramTokenizer

Create a NGramTokenizer which only generates tokens for the prefix ngrams.

Trait Implementations

[src]

impl Clone for NgramTokenizer

[src]

fn clone(&self) -> NgramTokenizer

Returns a copy of the value. Read more

1.0.0[src]

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more

[src]

impl Tokenizer for NgramTokenizer

[src]

fn token_stream<'a>(&self, text: &'a str) -> BoxTokenStream<'a>

Creates a token stream for a given str.

Auto Trait Implementations

impl RefUnwindSafe for NgramTokenizer

impl Send for NgramTokenizer

impl Sync for NgramTokenizer

impl Unpin for NgramTokenizer

impl UnwindSafe for NgramTokenizer

Blanket Implementations

[src]

impl<T> Any for T where
T: 'static + ?Sized,

[src]

pub fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

[src]

impl<T> Borrow<T> for T where
T: ?Sized,

[src]

pub fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

[src]

impl<T> BorrowMut<T> for T where
T: ?Sized,

[src]

pub fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

[src]

impl<T> Downcast for T where
T: Any,

[src]

pub fn into_any(self: Box<T, Global>) -> Box<dyn Any + 'static, Global>

Convert Box<dyn Trait> (where Trait: Downcast) to Box<dyn Any>. Box<dyn Any> can then be further downcast into Box<ConcreteType> where ConcreteType implements Trait. Read more

[src]

pub fn into_any_rc(self: Rc<T>) -> Rc<dyn Any + 'static>

Convert Rc<Trait> (where Trait: Downcast) to Rc<Any>. Rc<Any> can then be further downcast into Rc<ConcreteType> where ConcreteType implements Trait. Read more

[src]

pub fn as_any(&self) -> &(dyn Any + 'static)

Convert &Trait (where Trait: Downcast) to &Any. This is needed since Rust cannot generate &Any’s vtable from &Trait’s. Read more

[src]

pub fn as_any_mut(&mut self) -> &mut (dyn Any + 'static)

Convert &mut Trait (where Trait: Downcast) to &Any. This is needed since Rust cannot generate &mut Any’s vtable from &mut Trait’s. Read more

[src]

impl<T> DowncastSync for T where
T: Any + Send + Sync,

[src]

pub fn into_any_arc(self: Arc<T>) -> Arc<dyn Any + Send + Sync + 'static>

Convert Arc<Trait> (where Trait: Downcast) to Arc<Any>. Arc<Any> can then be further downcast into Arc<ConcreteType> where ConcreteType implements Trait. Read more

[src]

impl<T> From<T> for T

[src]

pub fn from(t: T) -> T

Performs the conversion.

[src]

impl<T, U> Into for T where
U: From<T>,

[src]

pub fn into(self) -> U

Performs the conversion.

impl<T> Pointable for T

pub const ALIGN: usize

The alignment of pointer.

type Init = T

The type for initializers.

pub unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more

pub unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more

pub unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more

pub unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more

[src]

impl<T> ToOwned for T where
T: Clone,

type Owned = T

The resulting type after obtaining ownership.

[src]

pub fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more

[src]

pub fn clone_into(&self, target: &mut T)

🔬 This is a nightly-only experimental API. (toowned_clone_into)

recently added

Uses borrowed data to replace owned data, usually by cloning. Read more

[src]

impl<T, U> TryFrom for T where
U: Into<T>,

type Error = Infallible

The type returned in the event of a conversion error.

[src]

pub fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

[src]

impl<T, U> TryInto for T where
U: TryFrom<T>,

type Error = >::Error

The type returned in the event of a conversion error.

[src]

pub fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

impl<V, T> VZip<V> for T where
V: MultiLane<T>,

pub fn vzip(self) -> V

[src]

Struct tantivy::tokenizer::NgramTokenizer[−][src]

Example

Implementations

impl NgramTokenizer

pub fn new( min_gram: usize, max_gram: usize, prefix_only: bool) -> NgramTokenizer

pub fn all_ngrams(min_gram: usize, max_gram: usize) -> NgramTokenizer

pub fn prefix_only(min_gram: usize, max_gram: usize) -> NgramTokenizer

Trait Implementations

impl Clone for NgramTokenizer

fn clone(&self) -> NgramTokenizer

fn clone_from(&mut self, source: &Self)

impl Tokenizer for NgramTokenizer

fn token_stream<'a>(&self, text: &'a str) -> BoxTokenStream<'a>

Auto Trait Implementations

impl RefUnwindSafe for NgramTokenizer

impl Send for NgramTokenizer

impl Sync for NgramTokenizer

impl Unpin for NgramTokenizer

impl UnwindSafe for NgramTokenizer

Blanket Implementations

impl<T> Any for T where T: 'static + ?Sized,

pub fn type_id(&self) -> TypeId

impl<T> Borrow<T> for T where T: ?Sized,

pub fn borrow(&self) -> &T

impl<T> BorrowMut<T> for T where T: ?Sized,

pub fn borrow_mut(&mut self) -> &mut T

impl<T> Downcast for T where T: Any,

pub fn into_any(self: Box<T, Global>) -> Box<dyn Any + 'static, Global>

pub fn into_any_rc(self: Rc<T>) -> Rc<dyn Any + 'static>

pub fn as_any(&self) -> &(dyn Any + 'static)

pub fn as_any_mut(&mut self) -> &mut (dyn Any + 'static)

impl<T> DowncastSync for T where T: Any + Send + Sync,

pub fn into_any_arc(self: Arc<T>) -> Arc<dyn Any + Send + Sync + 'static>

impl<T> From<T> for T

pub fn from(t: T) -> T

impl<T, U> Into<U> for T where U: From<T>,

pub fn into(self) -> U

impl<T> Pointable for T

pub const ALIGN: usize

type Init = T

pub unsafe fn init(init: <T as Pointable>::Init) -> usize

pub unsafe fn deref<'a>(ptr: usize) -> &'a T

pub unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

pub unsafe fn drop(ptr: usize)

impl<T> ToOwned for T where T: Clone,

type Owned = T

pub fn to_owned(&self) -> T

pub fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for T where U: Into<T>,

type Error = Infallible

pub fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for T where U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

pub fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for T where V: MultiLane<T>,

pub fn vzip(self) -> V

impl<T> Fruit for T where T: Send + Downcast,

pub fn new(
min_gram: usize,
max_gram: usize,
prefix_only: bool
) -> NgramTokenizer

impl<T> Any for T where
T: 'static + ?Sized,

impl<T> Borrow<T> for T where
T: ?Sized,

impl<T> BorrowMut<T> for T where
T: ?Sized,

impl<T> Downcast for T where
T: Any,

impl<T> DowncastSync for T where
T: Any + Send + Sync,

impl<T, U> Into<U> for T where
U: From<T>,

impl<T> ToOwned for T where
T: Clone,

impl<T, U> TryFrom<U> for T where
U: Into<T>,

impl<T, U> TryInto<U> for T where
U: TryFrom<T>,

impl<V, T> VZip<V> for T where
V: MultiLane<T>,

impl<T> Fruit for T where
T: Send + Downcast,