TFIDFVectorizer

Struct TFIDFVectorizer 

Source
pub struct TFIDFVectorizer<N = f16, K = String, E = DefaultTFIDFEngine>
where N: Num + Copy + Into<f64> + Send + Sync, E: TFIDFEngine<N> + Send + Sync, K: Clone + Send + Sync + Eq + Hash,
{ pub documents: IndexMap<KeyRc<K>, TFVector<N>>, pub term_dim_rev_index: IndexMap<Box<str>, Vec<KeyRc<K>>>, pub corpus_ref: Arc<Corpus>, pub idf_cache: IDFVector, /* private fields */ }

Fields§

§documents: IndexMap<KeyRc<K>, TFVector<N>>

Document’s TF Vector

§term_dim_rev_index: IndexMap<Box<str>, Vec<KeyRc<K>>>

TF Vector’s term dimension sample and reverse index

§corpus_ref: Arc<Corpus>

Corpus reference

§idf_cache: IDFVector

IDF Vector

Implementations§

Source§

impl<N, K, E> TFIDFVectorizer<N, K, E>
where N: Num + Copy + Serialize + Into<f64> + Send + Sync, K: Serialize + Clone + Send + Sync + Eq + Hash, E: TFIDFEngine<N> + Send + Sync,

Source

pub fn into_tfidf_data(self) -> TFIDFData<N, K, E>

Source§

impl<N, K, E> TFIDFVectorizer<N, K, E>
where K: Clone + Sync + Send + PartialEq + Eq + Hash, N: Num + Copy + Into<f64> + Send + Sync, E: TFIDFEngine<N> + Send + Sync,

Source

pub fn similarity( &mut self, algorithm: &SimilarityAlgorithm, freq: &TermFrequency, query: Option<&Query>, ) -> Hits<K>

High-level similarity search interface

Source

pub fn similarity_uncheck_idf( &self, algorithm: &SimilarityAlgorithm, freq: &TermFrequency, filter_query: Option<&Query>, ) -> Hits<K>

High-level similarity search interface without IDF update check

Source

pub fn search( &mut self, algorithm: &SimilarityAlgorithm, query: Query, ) -> Hits<K>

High-level search interface

Source

pub fn search_uncheck_idf( &self, algorithm: &SimilarityAlgorithm, query: Query, ) -> Hits<K>

High-level search interface without IDF update check

Source§

impl<N, K, E> TFIDFVectorizer<N, K, E>
where N: Num + Copy + Into<f64> + Send + Sync, E: TFIDFEngine<N> + Send + Sync, K: Clone + Send + Sync + Eq + Hash,

Source

pub fn new(corpus_ref: Arc<Corpus>) -> Self

Create a new TFIDFVectorizer instance

Source

pub fn set_corpus_ref(&mut self, corpus_ref: Arc<Corpus>)

set corpus reference and recalculate idf

Source

pub fn update_idf(&mut self)

Corpusに変更があればIDFを再計算する

Source§

impl<N, K, E> TFIDFVectorizer<N, K, E>
where N: Num + Copy + Into<f64> + Send + Sync, E: TFIDFEngine<N> + Send + Sync, K: PartialEq + Clone + Send + Sync + Eq + Hash,

Source

pub fn add_doc(&mut self, key: K, doc: &TermFrequency)

Add a document The immediately referenced Corpus is also updated

Source

pub fn del_doc(&mut self, key: &K)
where K: PartialEq,

Source

pub fn get_tf(&self, key: &K) -> Option<&TFVector<N>>
where K: Eq + Hash,

Get TFVector by document ID

Source

pub fn get_tf_into_term_freq(&self, key: &K) -> Option<TermFrequency>

Get TermFrequency by document ID If quantized, there may be some error Words not included in the corpus are ignored

Source

pub fn contains_doc(&self, key: &K) -> bool
where K: PartialEq,

Check if a document with the given ID exists

Source

pub fn contains_term(&self, term: &str) -> bool

Check if the term exists in the term dimension sample

Source

pub fn contains_terms_from_freq(&self, freq: &TermFrequency) -> bool

Check if all terms in the given TermFrequency exist in the term dimension sample

Source

pub fn doc_num(&self) -> usize

Trait Implementations§

Source§

impl<N, K, E> Clone for TFIDFVectorizer<N, K, E>
where N: Num + Copy + Into<f64> + Send + Sync + Clone, E: TFIDFEngine<N> + Send + Sync + Clone, K: Clone + Send + Sync + Eq + Hash + Clone,

Source§

fn clone(&self) -> TFIDFVectorizer<N, K, E>

Returns a duplicate of the value. Read more
1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
Source§

impl<N, K, E> Debug for TFIDFVectorizer<N, K, E>
where N: Num + Copy + Into<f64> + Send + Sync + Debug, E: TFIDFEngine<N> + Send + Sync + Debug, K: Clone + Send + Sync + Eq + Hash + Debug,

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
Source§

impl<'de, N, K, E> Deserialize<'de> for TFIDFVectorizer<N, K, E>
where N: Num + Copy + Deserialize<'de> + Into<f64> + Send + Sync, K: Deserialize<'de> + Clone + Send + Sync + Eq + Hash, E: TFIDFEngine<N> + Send + Sync,

Source§

fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where D: Deserializer<'de>,

Deserialize TFIDFVectorizer. This struct contains references, so they are excluded from deserialization. Use TFIDFData for deserialization.

Source§

impl<N, K, E> Serialize for TFIDFVectorizer<N, K, E>
where N: Num + Copy + Serialize + Into<f64> + Send + Sync, K: Serialize + Clone + Send + Sync + Eq + Hash, E: TFIDFEngine<N>,

Source§

fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where S: Serializer,

Serialize TFIDFVectorizer. This struct contains references, so they are excluded from serialization. Use TFIDFData for deserialization.

Auto Trait Implementations§

§

impl<N, K, E> Freeze for TFIDFVectorizer<N, K, E>

§

impl<N = f16, K = String, E = DefaultTFIDFEngine> !RefUnwindSafe for TFIDFVectorizer<N, K, E>

§

impl<N = f16, K = String, E = DefaultTFIDFEngine> !Send for TFIDFVectorizer<N, K, E>

§

impl<N = f16, K = String, E = DefaultTFIDFEngine> !Sync for TFIDFVectorizer<N, K, E>

§

impl<N, K, E> Unpin for TFIDFVectorizer<N, K, E>
where E: Unpin,

§

impl<N = f16, K = String, E = DefaultTFIDFEngine> !UnwindSafe for TFIDFVectorizer<N, K, E>

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> CloneToUninit for T
where T: Clone,

Source§

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dest. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> ToOwned for T
where T: Clone,

Source§

type Owned = T

The resulting type after obtaining ownership.
Source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
Source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<T> DeserializeOwned for T
where T: for<'de> Deserialize<'de>,