Enum NgramType

Source

pub enum NgramType {
    Window,
    Edge,
}

Expand description

The style of ngram extraction to use.

The same style of ngram extraction is always used at index time and at query time.

Each ngram type uses the ngram size configuration differently.

All ngram styles used Unicode codepoints as the definition of a character. For example, a 3-gram might contain up to 4 bytes, if it contains 3 Unicode codepoints that each require 4 UTF-8 code units.

Variants§

§

Window

A windowing ngram.

This is the tradition style of ngram, where sliding window of size N is moved across the entire content to be index. For example, the 3-grams for the string homer are hom, ome and mer.

§

Edge

An edge ngram.

This style of ngram produces ever longer ngrams, where each ngram is anchored to the start of a word. Words are determined simply by splitting whitespace.

For example, the edge ngrams of homer simpson, where the max ngram size is 5, would be: hom, home, homer, sim, simp, simps. Generally, for this ngram type, one wants to use a large maximum ngram size. Perhaps somewhere close to the maximum number of ngrams in any word in the corpus.

Note that there is no way to set the minimum ngram size (which is 3).

Enum NgramType Copy item path

Variants§

Window

Edge

Implementations§

impl NgramType

pub fn possible_names() -> &'static [&'static str]

pub fn as_str(&self) -> &'static str

Trait Implementations§

impl Clone for NgramType

fn clone(&self) -> NgramType

fn clone_from(&mut self, source: &Self)

impl Debug for NgramType

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl Default for NgramType

fn default() -> NgramType

impl<'de> Deserialize<'de> for NgramType

fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where __D: Deserializer<'de>,

impl Display for NgramType

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl FromStr for NgramType

type Err = Error

fn from_str(s: &str) -> Result<NgramType>

impl Hash for NgramType

fn hash<__H: Hasher>(&self, state: &mut __H)

fn hash_slice<H>(data: &[Self], state: &mut H)where H: Hasher, Self: Sized,

impl PartialEq for NgramType

fn eq(&self, other: &NgramType) -> bool

fn ne(&self, other: &Rhs) -> bool

impl Serialize for NgramType

fn serialize<__S>(&self, __serializer: __S) -> Result<__S::Ok, __S::Error>where __S: Serializer,

impl Copy for NgramType

impl Eq for NgramType

impl StructuralPartialEq for NgramType

Auto Trait Implementations§

impl Freeze for NgramType

impl RefUnwindSafe for NgramType

impl Send for NgramType

impl Sync for NgramType

impl Unpin for NgramType

impl UnwindSafe for NgramType

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T> ToString for Twhere T: Display + ?Sized,

fn to_string(&self) -> String

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<T> DeserializeOwned for Twhere T: for<'de> Deserialize<'de>,

Enum NgramType

fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where __D: Deserializer<'de>,

fn hash<H: Hasher>(&self, state: &mut H)

fn hash_slice<H>(data: &[Self], state: &mut H)
where H: Hasher, Self: Sized,

fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where S: Serializer,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> ToOwned for T
where T: Clone,

impl<T> ToString for T
where T: Display + ?Sized,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<T> DeserializeOwned for T
where T: for<'de> Deserialize<'de>,