pub struct TfidfVectorizer { /* private fields */ }Expand description
TF-IDF text vectorizer.
Combines count vectorization with IDF weighting and optional normalization. Produces a sparse CSR matrix.
§Example
ⓘ
use scry_learn::text::TfidfVectorizer;
let docs = ["the cat sat", "the dog sat", "the cat played"];
let mut tfidf = TfidfVectorizer::new();
let matrix = tfidf.fit_transform(&docs);Implementations§
Source§impl TfidfVectorizer
impl TfidfVectorizer
Sourcepub fn ngram_range(self, min_n: usize, max_n: usize) -> Self
pub fn ngram_range(self, min_n: usize, max_n: usize) -> Self
Set n-gram range.
Sourcepub fn max_features(self, n: usize) -> Self
pub fn max_features(self, n: usize) -> Self
Limit vocabulary size.
Sourcepub fn sublinear_tf(self, enable: bool) -> Self
pub fn sublinear_tf(self, enable: bool) -> Self
Enable sublinear TF scaling: tf = 1 + log(tf).
Sourcepub fn smooth_idf(self, enable: bool) -> Self
pub fn smooth_idf(self, enable: bool) -> Self
Enable smooth IDF: adds 1 to document frequencies. Default: true (matches sklearn).
Sourcepub fn fit<S: AsRef<str>>(&mut self, documents: &[S])
pub fn fit<S: AsRef<str>>(&mut self, documents: &[S])
Learn vocabulary and IDF weights from documents.
Sourcepub fn transform<S: AsRef<str>>(&self, documents: &[S]) -> CsrMatrix
pub fn transform<S: AsRef<str>>(&self, documents: &[S]) -> CsrMatrix
Transform documents into a TF-IDF weighted sparse matrix.
Sourcepub fn fit_transform<S: AsRef<str>>(&mut self, documents: &[S]) -> CsrMatrix
pub fn fit_transform<S: AsRef<str>>(&mut self, documents: &[S]) -> CsrMatrix
Fit and transform in one step.
Sourcepub fn vocabulary(&self) -> &HashMap<String, usize>
pub fn vocabulary(&self) -> &HashMap<String, usize>
Return the underlying vocabulary.
Sourcepub fn get_feature_names(&self) -> Vec<String>
pub fn get_feature_names(&self) -> Vec<String>
Return feature names sorted by column index.
Sourcepub fn n_features(&self) -> usize
pub fn n_features(&self) -> usize
Number of features.
Trait Implementations§
Source§impl Clone for TfidfVectorizer
impl Clone for TfidfVectorizer
Source§fn clone(&self) -> TfidfVectorizer
fn clone(&self) -> TfidfVectorizer
Returns a duplicate of the value. Read more
1.0.0 (const: unstable) · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source. Read moreSource§impl Debug for TfidfVectorizer
impl Debug for TfidfVectorizer
Auto Trait Implementations§
impl Freeze for TfidfVectorizer
impl RefUnwindSafe for TfidfVectorizer
impl Send for TfidfVectorizer
impl Sync for TfidfVectorizer
impl Unpin for TfidfVectorizer
impl UnsafeUnpin for TfidfVectorizer
impl UnwindSafe for TfidfVectorizer
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more