Struct linfa_preprocessing::tf_idf_vectorization::TfIdfVectorizer

source ·

pub struct TfIdfVectorizer { /* private fields */ }

Expand description

Simlar to CountVectorizer but instead of just counting the term frequency of each vocabulary entry in each given document, it computes the term frequecy times the inverse document frequency, thus giving more importance to entries that appear many times but only on some documents. The weight function can be adjusted by setting the appropriate method. This struct provides the same string
processing customizations described in CountVectorizer.

Struct linfa_preprocessing::tf_idf_vectorization::TfIdfVectorizer

Implementations§

impl TfIdfVectorizer

pub fn convert_to_lowercase(self, convert_to_lowercase: bool) -> Self

pub fn split_regex(self, regex_str: &str) -> Self

pub fn n_gram_range(self, min_n: usize, max_n: usize) -> Self

pub fn normalize(self, normalize: bool) -> Self

pub fn document_frequency(self, min_freq: f32, max_freq: f32) -> Self

pub fn stopwords<T: ToString>(self, stopwords: &[T]) -> Self

pub fn fit<T: ToString + Clone, D: Data<Elem = T>>( &self, x: &ArrayBase<D, Ix1> ) -> Result<FittedTfIdfVectorizer>

pub fn fit_vocabulary<T: ToString>( &self, words: &[T] ) -> Result<FittedTfIdfVectorizer>

pub fn fit_files<P: AsRef<Path>>( &self, input: &[P], encoding: EncodingRef, trap: DecoderTrap ) -> Result<FittedTfIdfVectorizer>

Trait Implementations§

impl Clone for TfIdfVectorizer

fn clone(&self) -> TfIdfVectorizer

fn clone_from(&mut self, source: &Self)

impl Debug for TfIdfVectorizer

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl Default for TfIdfVectorizer

fn default() -> Self

Auto Trait Implementations§

impl !RefUnwindSafe for TfIdfVectorizer

impl Send for TfIdfVectorizer

impl !Sync for TfIdfVectorizer

impl Unpin for TfIdfVectorizer

impl UnwindSafe for TfIdfVectorizer

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,