pub struct TFIDFVectorizer<'a, N = f32, K = String, E = DefaultTFIDFEngine, C = DefaultCompare>{
pub documents: Vec<TFVector<N, K>>,
pub token_dim_sample: Vec<String>,
pub token_dim_set: HashSet<String>,
pub corpus_ref: &'a Corpus,
pub idf: IDFVector<N>,
/* private fields */
}Fields§
§documents: Vec<TFVector<N, K>>ドキュメントのTFベクトル
token_dim_sample: Vec<String>TFベクトルのトークンの次元サンプル
token_dim_set: HashSet<String>高速存在判定用の語彙セット (token_dim_sample と常に同期)
corpus_ref: &'a Corpusコーパスの参照
idf: IDFVector<N>IDFベクトル
Implementations§
Source§impl<'a, N, K, E> TFIDFVectorizer<'a, N, K, E>
impl<'a, N, K, E> TFIDFVectorizer<'a, N, K, E>
Sourcepub fn similarity(&self, query: SimilarityQuery) -> Hits<K>
pub fn similarity(&self, query: SimilarityQuery) -> Hits<K>
Examples found in repository?
examples/basic.rs (line 17)
3fn main() {
4 let corpus = Corpus::new();
5 let mut freq1 = TokenFrequency::new();
6 freq1.add_tokens(&["rust", "高速", "並列", "rust"]);
7 let mut freq2 = TokenFrequency::new();
8 freq2.add_tokens(&["rust", "柔軟", "安全", "rust"]);
9
10 let mut vectorizer: TFIDFVectorizer<u16> = TFIDFVectorizer::new(&corpus);
11 vectorizer.add_doc("doc1".to_string(), &freq1);
12 vectorizer.add_doc("doc2".to_string(), &freq2);
13
14 let mut query_tokens = TokenFrequency::new();
15 query_tokens.add_tokens(&["rust", "高速"]);
16 let query = SimilarityQuery::CosineSimilarity(query_tokens);
17 let mut result = vectorizer.similarity(query);
18 result.sort_by_score();
19
20 result.list.iter().for_each(|(k, s)| {
21 println!("doc: {}, score: {}", k, s);
22 });
23}Source§impl<'a, N, K, E, C> TFIDFVectorizer<'a, N, K, E, C>
impl<'a, N, K, E, C> TFIDFVectorizer<'a, N, K, E, C>
Sourcepub fn new(corpus_ref: &'a Corpus) -> Self
pub fn new(corpus_ref: &'a Corpus) -> Self
Create a new TFIDFVectorizer instance
Examples found in repository?
examples/basic.rs (line 10)
3fn main() {
4 let corpus = Corpus::new();
5 let mut freq1 = TokenFrequency::new();
6 freq1.add_tokens(&["rust", "高速", "並列", "rust"]);
7 let mut freq2 = TokenFrequency::new();
8 freq2.add_tokens(&["rust", "柔軟", "安全", "rust"]);
9
10 let mut vectorizer: TFIDFVectorizer<u16> = TFIDFVectorizer::new(&corpus);
11 vectorizer.add_doc("doc1".to_string(), &freq1);
12 vectorizer.add_doc("doc2".to_string(), &freq2);
13
14 let mut query_tokens = TokenFrequency::new();
15 query_tokens.add_tokens(&["rust", "高速"]);
16 let query = SimilarityQuery::CosineSimilarity(query_tokens);
17 let mut result = vectorizer.similarity(query);
18 result.sort_by_score();
19
20 result.list.iter().for_each(|(k, s)| {
21 println!("doc: {}, score: {}", k, s);
22 });
23}Sourcepub fn set_corpus_ref(&mut self, corpus_ref: &'a Corpus)
pub fn set_corpus_ref(&mut self, corpus_ref: &'a Corpus)
Corpusを指定する
Sourcepub fn update_idf(&mut self)
pub fn update_idf(&mut self)
Corpusに変更があればIDFを再計算する
Source§impl<'a, N, K, E, C> TFIDFVectorizer<'a, N, K, E, C>
impl<'a, N, K, E, C> TFIDFVectorizer<'a, N, K, E, C>
Sourcepub fn add_doc(&mut self, doc_id: K, doc: &TokenFrequency)
pub fn add_doc(&mut self, doc_id: K, doc: &TokenFrequency)
ドキュメントを追加します 即時参照されているCorpusも更新されます
Examples found in repository?
examples/basic.rs (line 11)
3fn main() {
4 let corpus = Corpus::new();
5 let mut freq1 = TokenFrequency::new();
6 freq1.add_tokens(&["rust", "高速", "並列", "rust"]);
7 let mut freq2 = TokenFrequency::new();
8 freq2.add_tokens(&["rust", "柔軟", "安全", "rust"]);
9
10 let mut vectorizer: TFIDFVectorizer<u16> = TFIDFVectorizer::new(&corpus);
11 vectorizer.add_doc("doc1".to_string(), &freq1);
12 vectorizer.add_doc("doc2".to_string(), &freq2);
13
14 let mut query_tokens = TokenFrequency::new();
15 query_tokens.add_tokens(&["rust", "高速"]);
16 let query = SimilarityQuery::CosineSimilarity(query_tokens);
17 let mut result = vectorizer.similarity(query);
18 result.sort_by_score();
19
20 result.list.iter().for_each(|(k, s)| {
21 println!("doc: {}, score: {}", k, s);
22 });
23}Trait Implementations§
Source§impl<'a, N, K: Debug, E, C> Debug for TFIDFVectorizer<'a, N, K, E, C>
impl<'a, N, K: Debug, E, C> Debug for TFIDFVectorizer<'a, N, K, E, C>
Source§impl<'a, N, K, E, C> Serialize for TFIDFVectorizer<'a, N, K, E, C>
impl<'a, N, K, E, C> Serialize for TFIDFVectorizer<'a, N, K, E, C>
Auto Trait Implementations§
impl<'a, N, K, E, C> Freeze for TFIDFVectorizer<'a, N, K, E, C>
impl<'a, N = f32, K = String, E = DefaultTFIDFEngine, C = DefaultCompare> !RefUnwindSafe for TFIDFVectorizer<'a, N, K, E, C>
impl<'a, N, K, E, C> Send for TFIDFVectorizer<'a, N, K, E, C>
impl<'a, N, K, E, C> Sync for TFIDFVectorizer<'a, N, K, E, C>
impl<'a, N, K, E, C> Unpin for TFIDFVectorizer<'a, N, K, E, C>
impl<'a, N = f32, K = String, E = DefaultTFIDFEngine, C = DefaultCompare> !UnwindSafe for TFIDFVectorizer<'a, N, K, E, C>
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more