pub struct TFIDFVectorizer<N = f32, K = String, E = DefaultTFIDFEngine>where
N: Num + Copy + Into<f64> + Send + Sync,
E: TFIDFEngine<N, K> + Send + Sync,
K: Clone + Send + Sync + Eq + Hash,{
pub documents: IndexMap<KeyRc<K>, TFVector<N>>,
pub token_dim_rev_index: IndexMap<Box<str>, Vec<KeyRc<K>>>,
pub corpus_ref: Arc<Corpus>,
pub idf_cache: IDFVector,
/* private fields */
}Fields§
§documents: IndexMap<KeyRc<K>, TFVector<N>>Document’s TF Vector
token_dim_rev_index: IndexMap<Box<str>, Vec<KeyRc<K>>>TF Vector’s token dimension sample and reverse index
corpus_ref: Arc<Corpus>Corpus reference
idf_cache: IDFVectorIDF Vector
Implementations§
Source§impl<N, K, E> TFIDFVectorizer<N, K, E>
impl<N, K, E> TFIDFVectorizer<N, K, E>
Sourcepub fn similarity(
&mut self,
freq: &TokenFrequency,
algorithm: &SimilarityAlgorithm,
) -> Hits<K>
pub fn similarity( &mut self, freq: &TokenFrequency, algorithm: &SimilarityAlgorithm, ) -> Hits<K>
Calculate similarity scores based on query token frequency Uses the specified similarity algorithm Calls update_idf() to use the latest IDF vector
Sourcepub fn similarity_uncheck_idf(
&self,
freq: &TokenFrequency,
algorithm: &SimilarityAlgorithm,
) -> Hits<K>
pub fn similarity_uncheck_idf( &self, freq: &TokenFrequency, algorithm: &SimilarityAlgorithm, ) -> Hits<K>
Calculate similarity scores based on query token frequency Uses the specified similarity algorithm Does not check the IDF vector (can be called with immutable reference) Call update_idf() manually if needed
pub fn similarity_full_scan( &mut self, freq: &TokenFrequency, algorithm: &SimilarityAlgorithm, ) -> Hits<K>
pub fn similarity_full_scan_uncheck_idf( &self, freq: &TokenFrequency, algorithm: &SimilarityAlgorithm, ) -> Hits<K>
Source§impl<N, K, E> TFIDFVectorizer<N, K, E>
impl<N, K, E> TFIDFVectorizer<N, K, E>
Sourcepub fn set_corpus_ref(&mut self, corpus_ref: Arc<Corpus>)
pub fn set_corpus_ref(&mut self, corpus_ref: Arc<Corpus>)
set corpus reference and recalculate idf
Sourcepub fn update_idf(&mut self)
pub fn update_idf(&mut self)
Corpusに変更があればIDFを再計算する
Source§impl<N, K, E> TFIDFVectorizer<N, K, E>
impl<N, K, E> TFIDFVectorizer<N, K, E>
Sourcepub fn add_doc(&mut self, key: K, doc: &TokenFrequency)
pub fn add_doc(&mut self, key: K, doc: &TokenFrequency)
Add a document The immediately referenced Corpus is also updated
pub fn del_doc(&mut self, key: &K)where
K: PartialEq,
Sourcepub fn get_tf_into_token_freq(&self, key: &K) -> Option<TokenFrequency>
pub fn get_tf_into_token_freq(&self, key: &K) -> Option<TokenFrequency>
Get TokenFrequency by document ID If quantized, there may be some error Words not included in the corpus are ignored
Sourcepub fn contains_doc(&self, key: &K) -> boolwhere
K: PartialEq,
pub fn contains_doc(&self, key: &K) -> boolwhere
K: PartialEq,
Check if a document with the given ID exists
Sourcepub fn contains_token(&self, token: &str) -> bool
pub fn contains_token(&self, token: &str) -> bool
Check if the token exists in the token dimension sample
Sourcepub fn contains_tokens_from_freq(&self, freq: &TokenFrequency) -> bool
pub fn contains_tokens_from_freq(&self, freq: &TokenFrequency) -> bool
Check if all tokens in the given TokenFrequency exist in the token dimension sample
pub fn doc_num(&self) -> usize
Trait Implementations§
Source§impl<N, K, E> Clone for TFIDFVectorizer<N, K, E>
impl<N, K, E> Clone for TFIDFVectorizer<N, K, E>
Source§fn clone(&self) -> TFIDFVectorizer<N, K, E>
fn clone(&self) -> TFIDFVectorizer<N, K, E>
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read more