Struct nlprule::tokenizer::TokenizerOptions[][src]

pub struct TokenizerOptions {
    pub allow_errors: bool,
    pub retain_last: bool,
    pub use_compound_split_heuristic: bool,
    pub always_add_lower_tags: bool,
    pub ids: Vec<String>,
    pub ignore_ids: Vec<String>,
    pub known_failures: Vec<String>,
    pub extra_tags: Vec<String>,
    pub extra_split_chars: Vec<char>,
    pub extra_join_regexes: Vec<SerializeRegex>,
}

Options for a tokenizer.

Fields

allow_errors: bool

Whether to allow errors while constructing the tokenizer.

retain_last: bool

Whether to retain the last tag if disambiguation leads to an empty tag. Language-specific in LT so it has to be an option.

use_compound_split_heuristic: bool

Whether to use a heuristic to split potential compound words.

always_add_lower_tags: bool

Whether to always add tags for a lowercase version of the word when assigning part-of-speech tags.

ids: Vec<String>

Disambiguation Rule IDs to use in this tokenizer.

ignore_ids: Vec<String>

Disambiguation Rule IDs to ignore in this tokenizer.

known_failures: Vec<String>

Specific examples in the notation {id}:{example_index} which are known to fail.

extra_tags: Vec<String>

Used part-of-speech tags which are not in the tagger dictionary.

extra_split_chars: Vec<char>

Extra language-specific characters to split text on.

extra_join_regexes: Vec<SerializeRegex>

Extra language-specific Regexes of which the matches will not be split into multiple tokens.

Trait Implementations

impl Default for TokenizerOptions[src]

impl<'de> Deserialize<'de> for TokenizerOptions[src]

impl Serialize for TokenizerOptions[src]

Auto Trait Implementations

Blanket Implementations

impl<T> Any for T where
    T: 'static + ?Sized
[src]

impl<T> Borrow<T> for T where
    T: ?Sized
[src]

impl<T> BorrowMut<T> for T where
    T: ?Sized
[src]

impl<T> DeserializeOwned for T where
    T: for<'de> Deserialize<'de>, 
[src]

impl<T> From<T> for T[src]

impl<T, U> Into<U> for T where
    U: From<T>, 
[src]

impl<T> Pointable for T

type Init = T

The type for initializers.

impl<T, U> TryFrom<U> for T where
    U: Into<T>, 
[src]

type Error = Infallible

The type returned in the event of a conversion error.

impl<T, U> TryInto<U> for T where
    U: TryFrom<T>, 
[src]

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.