Struct linfa_preprocessing::tf_idf_vectorization::TfIdfVectorizer[−][src]

pub struct TfIdfVectorizer { /* fields omitted */ }

Expand description

Simlar to CountVectorizer but instead of just counting the term frequency of each vocabulary entry in each given document, it computes the term frequecy times the inverse document frequency, thus giving more importance to entries that appear many times but only on some documents. The weight function can be adjusted by setting the appropriate method. This struct provides the same string
processing customizations described in CountVectorizer.

Struct linfa_preprocessing::tf_idf_vectorization::TfIdfVectorizer[−][src]

Implementations

impl TfIdfVectorizer

pub fn convert_to_lowercase(self, convert_to_lowercase: bool) -> Self

pub fn split_regex(self, regex_str: &str) -> Self

pub fn n_gram_range(self, min_n: usize, max_n: usize) -> Self

pub fn normalize(self, normalize: bool) -> Self

pub fn document_frequency(self, min_freq: f32, max_freq: f32) -> Self

pub fn stopwords<T: ToString>(self, stopwords: &[T]) -> Self

pub fn fit<T: ToString + Clone, D: Data<Elem = T>>( &self, x: &ArrayBase<D, Ix1>) -> Result<FittedTfIdfVectorizer>

pub fn fit_vocabulary<T: ToString>( &self, words: &[T]) -> Result<FittedTfIdfVectorizer>

pub fn fit_files<P: AsRef<Path>>( &self, input: &[P], encoding: EncodingRef, trap: DecoderTrap) -> Result<FittedTfIdfVectorizer>

Trait Implementations

impl Default for TfIdfVectorizer

fn default() -> Self

Auto Trait Implementations

impl !RefUnwindSafe for TfIdfVectorizer

impl Send for TfIdfVectorizer

impl !Sync for TfIdfVectorizer

impl Unpin for TfIdfVectorizer

impl UnwindSafe for TfIdfVectorizer

Blanket Implementations

impl<T> Any for T where T: 'static + ?Sized,

pub fn type_id(&self) -> TypeId

impl<T> Borrow<T> for T where T: ?Sized,

pub fn borrow(&self) -> &T

impl<T> BorrowMut<T> for T where T: ?Sized,

pub fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

pub fn from(t: T) -> T

impl<T, U> Into<U> for T where U: From<T>,

pub fn into(self) -> U

impl<T, U> TryFrom<U> for T where U: Into<T>,

type Error = Infallible

pub fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for T where U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

pub fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for T where V: MultiLane<T>,

pub fn vzip(self) -> V

pub fn fit<T: ToString + Clone, D: Data<Elem = T>>(
&self,
x: &ArrayBase<D, Ix1>
) -> Result<FittedTfIdfVectorizer>

pub fn fit_vocabulary<T: ToString>(
&self,
words: &[T]
) -> Result<FittedTfIdfVectorizer>

pub fn fit_files<P: AsRef<Path>>(
&self,
input: &[P],
encoding: EncodingRef,
trap: DecoderTrap
) -> Result<FittedTfIdfVectorizer>

impl<T> Any for T where
T: 'static + ?Sized,

impl<T> Borrow<T> for T where
T: ?Sized,

impl<T> BorrowMut<T> for T where
T: ?Sized,

impl<T, U> Into<U> for T where
U: From<T>,

impl<T, U> TryFrom<U> for T where
U: Into<T>,

impl<T, U> TryInto<U> for T where
U: TryFrom<T>,

impl<V, T> VZip<V> for T where
V: MultiLane<T>,