pub struct TFIDFVectorizer<N = f16, K = String, E = DefaultTFIDFEngine>where
N: Num + Copy + Into<f64> + Send + Sync,
E: TFIDFEngine<N> + Send + Sync,
K: Clone + Send + Sync + Eq + Hash,{
pub documents: IndexMap<KeyRc<K>, TFVector<N>>,
pub term_dim_rev_index: IndexMap<Box<str>, Vec<KeyRc<K>>>,
pub corpus_ref: Arc<Corpus>,
pub idf_cache: IDFVector,
/* private fields */
}Fields§
§documents: IndexMap<KeyRc<K>, TFVector<N>>Document’s TF Vector
term_dim_rev_index: IndexMap<Box<str>, Vec<KeyRc<K>>>TF Vector’s term dimension sample and reverse index
corpus_ref: Arc<Corpus>Corpus reference
idf_cache: IDFVectorIDF Vector
Implementations§
Source§impl<N, K, E> TFIDFVectorizer<N, K, E>
impl<N, K, E> TFIDFVectorizer<N, K, E>
pub fn into_tfidf_data(self) -> TFIDFData<N, K, E>
Source§impl<N, K, E> TFIDFVectorizer<N, K, E>
impl<N, K, E> TFIDFVectorizer<N, K, E>
Sourcepub fn similarity(
&mut self,
algorithm: &SimilarityAlgorithm,
freq: &TermFrequency,
query: Option<&Query>,
) -> Hits<K>
pub fn similarity( &mut self, algorithm: &SimilarityAlgorithm, freq: &TermFrequency, query: Option<&Query>, ) -> Hits<K>
High-level similarity search interface
Sourcepub fn similarity_uncheck_idf(
&self,
algorithm: &SimilarityAlgorithm,
freq: &TermFrequency,
filter_query: Option<&Query>,
) -> Hits<K>
pub fn similarity_uncheck_idf( &self, algorithm: &SimilarityAlgorithm, freq: &TermFrequency, filter_query: Option<&Query>, ) -> Hits<K>
High-level similarity search interface without IDF update check
Sourcepub fn search(
&mut self,
algorithm: &SimilarityAlgorithm,
query: Query,
) -> Hits<K>
pub fn search( &mut self, algorithm: &SimilarityAlgorithm, query: Query, ) -> Hits<K>
High-level search interface
Sourcepub fn search_uncheck_idf(
&self,
algorithm: &SimilarityAlgorithm,
query: Query,
) -> Hits<K>
pub fn search_uncheck_idf( &self, algorithm: &SimilarityAlgorithm, query: Query, ) -> Hits<K>
High-level search interface without IDF update check
Source§impl<N, K, E> TFIDFVectorizer<N, K, E>
impl<N, K, E> TFIDFVectorizer<N, K, E>
Sourcepub fn set_corpus_ref(&mut self, corpus_ref: Arc<Corpus>)
pub fn set_corpus_ref(&mut self, corpus_ref: Arc<Corpus>)
set corpus reference and recalculate idf
Sourcepub fn update_idf(&mut self)
pub fn update_idf(&mut self)
Corpusに変更があればIDFを再計算する
Source§impl<N, K, E> TFIDFVectorizer<N, K, E>
impl<N, K, E> TFIDFVectorizer<N, K, E>
Sourcepub fn add_doc(&mut self, key: K, doc: &TermFrequency)
pub fn add_doc(&mut self, key: K, doc: &TermFrequency)
Add a document The immediately referenced Corpus is also updated
pub fn del_doc(&mut self, key: &K)where
K: PartialEq,
Sourcepub fn get_tf_into_term_freq(&self, key: &K) -> Option<TermFrequency>
pub fn get_tf_into_term_freq(&self, key: &K) -> Option<TermFrequency>
Get TermFrequency by document ID If quantized, there may be some error Words not included in the corpus are ignored
Sourcepub fn contains_doc(&self, key: &K) -> boolwhere
K: PartialEq,
pub fn contains_doc(&self, key: &K) -> boolwhere
K: PartialEq,
Check if a document with the given ID exists
Sourcepub fn contains_term(&self, term: &str) -> bool
pub fn contains_term(&self, term: &str) -> bool
Check if the term exists in the term dimension sample
Sourcepub fn contains_terms_from_freq(&self, freq: &TermFrequency) -> bool
pub fn contains_terms_from_freq(&self, freq: &TermFrequency) -> bool
Check if all terms in the given TermFrequency exist in the term dimension sample
pub fn doc_num(&self) -> usize
Trait Implementations§
Source§impl<N, K, E> Clone for TFIDFVectorizer<N, K, E>
impl<N, K, E> Clone for TFIDFVectorizer<N, K, E>
Source§fn clone(&self) -> TFIDFVectorizer<N, K, E>
fn clone(&self) -> TFIDFVectorizer<N, K, E>
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read moreSource§impl<N, K, E> Debug for TFIDFVectorizer<N, K, E>
impl<N, K, E> Debug for TFIDFVectorizer<N, K, E>
Source§impl<'de, N, K, E> Deserialize<'de> for TFIDFVectorizer<N, K, E>
impl<'de, N, K, E> Deserialize<'de> for TFIDFVectorizer<N, K, E>
Source§fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>where
D: Deserializer<'de>,
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>where
D: Deserializer<'de>,
Deserialize TFIDFVectorizer.
This struct contains references, so they are excluded from deserialization.
Use TFIDFData for deserialization.