pub struct WordTokenizer {
pub lowercase: bool,
pub remove_punctuation: bool,
pub min_word_length: usize,
/* private fields */
}Expand description
Word-level tokenizer
Fields§
§lowercase: bool§remove_punctuation: bool§min_word_length: usizeImplementations§
Source§impl WordTokenizer
impl WordTokenizer
pub fn new() -> Self
pub fn lowercase(self, lowercase: bool) -> Self
pub fn remove_punctuation(self, remove: bool) -> Self
pub fn min_word_length(self, length: usize) -> Self
pub fn tokenize(&self, text: &str) -> Vec<String>
pub fn fit(&mut self, texts: &[String])
pub fn texts_to_sequences(&self, texts: &[String]) -> Vec<Vec<usize>>
pub fn sequences_to_texts(&self, sequences: &[Vec<usize>]) -> Vec<String>
pub fn vocab_size(&self) -> usize
pub fn vocab(&self) -> Option<&HashMap<String, usize>>
Trait Implementations§
Auto Trait Implementations§
impl Freeze for WordTokenizer
impl RefUnwindSafe for WordTokenizer
impl Send for WordTokenizer
impl Sync for WordTokenizer
impl Unpin for WordTokenizer
impl UnwindSafe for WordTokenizer
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more