Struct nlprule::tokenizer::TokenizerOptions [−][src]
Options for a tokenizer.
Fields
allow_errors: bool
Whether to allow errors while constructing the tokenizer.
retain_last: bool
Whether to retain the last tag if disambiguation leads to an empty tag. Language-specific in LT so it has to be an option.
use_compound_split_heuristic: bool
Whether to use a heuristic to split potential compound words.
Whether to always add tags for a lowercase version of the word when assigning part-of-speech tags.
ids: Vec<String>
Disambiguation Rule IDs to use in this tokenizer.
ignore_ids: Vec<String>
Disambiguation Rule IDs to ignore in this tokenizer.
known_failures: Vec<String>
Specific examples in the notation {id}:{example_index}
which are known to fail.
Used part-of-speech tags which are not in the tagger dictionary.
extra_split_chars: Vec<char>
Extra language-specific characters to split text on.
extra_join_regexes: Vec<SerializeRegex>
Extra language-specific Regexes of which the matches will not be split into multiple tokens.
Trait Implementations
impl Default for TokenizerOptions
[src]
impl<'de> Deserialize<'de> for TokenizerOptions
[src]
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error> where
__D: Deserializer<'de>,
[src]
__D: Deserializer<'de>,
impl Serialize for TokenizerOptions
[src]
Auto Trait Implementations
impl RefUnwindSafe for TokenizerOptions
[src]
impl Send for TokenizerOptions
[src]
impl Sync for TokenizerOptions
[src]
impl Unpin for TokenizerOptions
[src]
impl UnwindSafe for TokenizerOptions
[src]
Blanket Implementations
impl<T> Any for T where
T: 'static + ?Sized,
[src]
T: 'static + ?Sized,
impl<T> Borrow<T> for T where
T: ?Sized,
[src]
T: ?Sized,
impl<T> BorrowMut<T> for T where
T: ?Sized,
[src]
T: ?Sized,
pub fn borrow_mut(&mut self) -> &mut T
[src]
impl<T> DeserializeOwned for T where
T: for<'de> Deserialize<'de>,
[src]
T: for<'de> Deserialize<'de>,
impl<T> From<T> for T
[src]
impl<T, U> Into<U> for T where
U: From<T>,
[src]
U: From<T>,
impl<T> Pointable for T
pub const ALIGN: usize
type Init = T
The type for initializers.
pub unsafe fn init(init: <T as Pointable>::Init) -> usize
pub unsafe fn deref<'a>(ptr: usize) -> &'a T
pub unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T
pub unsafe fn drop(ptr: usize)
impl<T, U> TryFrom<U> for T where
U: Into<T>,
[src]
U: Into<T>,
type Error = Infallible
The type returned in the event of a conversion error.
pub fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>
[src]
impl<T, U> TryInto<U> for T where
U: TryFrom<T>,
[src]
U: TryFrom<T>,