basic/
basic.rs

1use std::sync::Arc;
2
3use tf_idf_vectorizer::{Corpus, SimilarityAlgorithm, TFIDFVectorizer, TokenFrequency};
4
5fn main() {
6    // build corpus
7    let corpus = Arc::new(Corpus::new());
8
9    // add documents
10    let mut freq1 = TokenFrequency::new();
11    freq1.add_tokens(&["rust", "高速", "並列", "rust"]);
12    let mut freq2 = TokenFrequency::new();
13    freq2.add_tokens(&["rust", "柔軟", "安全", "rust"]);
14
15    // build query
16    let mut vectorizer: TFIDFVectorizer<u16> = TFIDFVectorizer::new(corpus);    
17    vectorizer.add_doc("doc1".to_string(), &freq1);
18    vectorizer.add_doc("doc2".to_string(), &freq2);
19    vectorizer.del_doc(&"doc1".to_string());
20    vectorizer.add_doc("doc3".to_string(), &freq1);
21
22    // similarity search
23    let mut query_tokens = TokenFrequency::new();
24    query_tokens.add_tokens(&["rust", "高速"]);
25    let algorithm = SimilarityAlgorithm::CosineSimilarity;
26    let mut result = vectorizer.similarity(&query_tokens, &algorithm);
27    result.sort_by_score();
28
29    // print result
30    result.list.iter().for_each(|(k, s, l)| {
31        println!("doc: {}, score: {}, length: {}", k, s, l);
32    });
33    // debug
34    println!("result count: {}", result.list.len());
35    println!("{:?}", vectorizer);
36}