pub struct HashingVectorizer { /* private fields */ }Expand description
Stateless hashing vectorizer for streaming/large-scale text.
Maps tokens to feature indices using a hash function. Does not store a vocabulary, making it memory-efficient but irreversible.
§Examples
use aprender::text::vectorize::HashingVectorizer;
use aprender::text::tokenize::WhitespaceTokenizer;
let docs = vec!["hello world", "hello rust"];
let vectorizer = HashingVectorizer::new(1000)
.with_tokenizer(Box::new(WhitespaceTokenizer::new()));
let matrix = vectorizer.transform(&docs).expect("transform should succeed");
assert_eq!(matrix.n_rows(), 2);
assert_eq!(matrix.n_cols(), 1000);Implementations§
Source§impl HashingVectorizer
impl HashingVectorizer
Sourcepub fn new(n_features: usize) -> Self
pub fn new(n_features: usize) -> Self
Create a new HashingVectorizer with specified number of features.
Sourcepub fn with_tokenizer(self, tokenizer: Box<dyn Tokenizer>) -> Self
pub fn with_tokenizer(self, tokenizer: Box<dyn Tokenizer>) -> Self
Set the tokenizer.
Sourcepub fn with_lowercase(self, lowercase: bool) -> Self
pub fn with_lowercase(self, lowercase: bool) -> Self
Set lowercase.
Sourcepub fn with_ngram_range(self, min_n: usize, max_n: usize) -> Self
pub fn with_ngram_range(self, min_n: usize, max_n: usize) -> Self
Set n-gram range.
Sourcepub fn with_stop_words_english(self) -> Self
pub fn with_stop_words_english(self) -> Self
Use English stop words.
Auto Trait Implementations§
impl Freeze for HashingVectorizer
impl !RefUnwindSafe for HashingVectorizer
impl !Send for HashingVectorizer
impl !Sync for HashingVectorizer
impl Unpin for HashingVectorizer
impl !UnwindSafe for HashingVectorizer
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more