pub struct TFIDFVectorizer<N = f16, K = String, E = DefaultTFIDFEngine>where
N: Num + Copy + Into<f64> + Send + Sync,
E: TFIDFEngine<N, K> + Send + Sync,
K: Clone + Send + Sync + Eq + Hash,{
pub documents: IndexMap<KeyRc<K>, TFVector<N>>,
pub token_dim_rev_index: IndexMap<Box<str>, Vec<KeyRc<K>>>,
pub corpus_ref: Arc<Corpus>,
pub idf_cache: IDFVector,
/* private fields */
}Fields§
§documents: IndexMap<KeyRc<K>, TFVector<N>>Document’s TF Vector
token_dim_rev_index: IndexMap<Box<str>, Vec<KeyRc<K>>>TF Vector’s token dimension sample and reverse index
corpus_ref: Arc<Corpus>Corpus reference
idf_cache: IDFVectorIDF Vector
Implementations§
Source§impl<N, K, E> TFIDFVectorizer<N, K, E>
impl<N, K, E> TFIDFVectorizer<N, K, E>
pub fn into_tfidf_data(self) -> TFIDFData<N, K, E>
Source§impl<N, K, E> TFIDFVectorizer<N, K, E>
impl<N, K, E> TFIDFVectorizer<N, K, E>
pub fn similarity( &mut self, algorithm: &SimilarityAlgorithm, freq: &TokenFrequency, query: Option<&Query>, ) -> Hits<K>
pub fn similarity_uncheck_idf( &self, algorithm: &SimilarityAlgorithm, freq: &TokenFrequency, filter_query: Option<&Query>, ) -> Hits<K>
pub fn search( &mut self, algorithm: &SimilarityAlgorithm, query: Query, ) -> Hits<K>
pub fn search_uncheck_idf( &self, algorithm: &SimilarityAlgorithm, query: Query, ) -> Hits<K>
Source§impl<N, K, E> TFIDFVectorizer<N, K, E>
impl<N, K, E> TFIDFVectorizer<N, K, E>
Sourcepub fn set_corpus_ref(&mut self, corpus_ref: Arc<Corpus>)
pub fn set_corpus_ref(&mut self, corpus_ref: Arc<Corpus>)
set corpus reference and recalculate idf
Sourcepub fn update_idf(&mut self)
pub fn update_idf(&mut self)
Corpusに変更があればIDFを再計算する
Source§impl<N, K, E> TFIDFVectorizer<N, K, E>
impl<N, K, E> TFIDFVectorizer<N, K, E>
Sourcepub fn add_doc(&mut self, key: K, doc: &TokenFrequency)
pub fn add_doc(&mut self, key: K, doc: &TokenFrequency)
Add a document The immediately referenced Corpus is also updated
pub fn del_doc(&mut self, key: &K)where
K: PartialEq,
Sourcepub fn get_tf_into_token_freq(&self, key: &K) -> Option<TokenFrequency>
pub fn get_tf_into_token_freq(&self, key: &K) -> Option<TokenFrequency>
Get TokenFrequency by document ID If quantized, there may be some error Words not included in the corpus are ignored
Sourcepub fn contains_doc(&self, key: &K) -> boolwhere
K: PartialEq,
pub fn contains_doc(&self, key: &K) -> boolwhere
K: PartialEq,
Check if a document with the given ID exists
Sourcepub fn contains_token(&self, token: &str) -> bool
pub fn contains_token(&self, token: &str) -> bool
Check if the token exists in the token dimension sample
Sourcepub fn contains_tokens_from_freq(&self, freq: &TokenFrequency) -> bool
pub fn contains_tokens_from_freq(&self, freq: &TokenFrequency) -> bool
Check if all tokens in the given TokenFrequency exist in the token dimension sample
pub fn doc_num(&self) -> usize
Trait Implementations§
Source§impl<N, K, E> Clone for TFIDFVectorizer<N, K, E>
impl<N, K, E> Clone for TFIDFVectorizer<N, K, E>
Source§fn clone(&self) -> TFIDFVectorizer<N, K, E>
fn clone(&self) -> TFIDFVectorizer<N, K, E>
Returns a duplicate of the value. Read more
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source. Read moreSource§impl<N, K, E> Debug for TFIDFVectorizer<N, K, E>
impl<N, K, E> Debug for TFIDFVectorizer<N, K, E>
Source§impl<'de, N, K, E> Deserialize<'de> for TFIDFVectorizer<N, K, E>
impl<'de, N, K, E> Deserialize<'de> for TFIDFVectorizer<N, K, E>
Source§fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>where
D: Deserializer<'de>,
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>where
D: Deserializer<'de>,
Deserialize TFIDFVectorizer.
This struct contains references, so they are excluded from deserialization.
Use TFIDFData for deserialization.
Source§impl<N, K, E> Serialize for TFIDFVectorizer<N, K, E>
impl<N, K, E> Serialize for TFIDFVectorizer<N, K, E>
Auto Trait Implementations§
impl<N, K, E> Freeze for TFIDFVectorizer<N, K, E>
impl<N = f16, K = String, E = DefaultTFIDFEngine> !RefUnwindSafe for TFIDFVectorizer<N, K, E>
impl<N = f16, K = String, E = DefaultTFIDFEngine> !Send for TFIDFVectorizer<N, K, E>
impl<N = f16, K = String, E = DefaultTFIDFEngine> !Sync for TFIDFVectorizer<N, K, E>
impl<N, K, E> Unpin for TFIDFVectorizer<N, K, E>
impl<N = f16, K = String, E = DefaultTFIDFEngine> !UnwindSafe for TFIDFVectorizer<N, K, E>
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more