basic/
basic.rs

1use tf_idf_vectorizer::{Corpus, SimilarityQuery, TFIDFVectorizer, TokenFrequency};
2
3fn main() {
4    let corpus = Corpus::new();
5    let mut freq1 = TokenFrequency::new();
6    freq1.add_tokens(&["rust", "高速", "並列", "rust"]);
7    let mut freq2 = TokenFrequency::new();
8    freq2.add_tokens(&["rust", "柔軟", "安全", "rust"]);
9
10    let mut vectorizer: TFIDFVectorizer<u16> = TFIDFVectorizer::new(&corpus);    
11    vectorizer.add_doc("doc1".to_string(), &freq1);
12    vectorizer.add_doc("doc2".to_string(), &freq2);
13
14    let mut query_tokens = TokenFrequency::new();
15    query_tokens.add_tokens(&["rust", "高速"]);
16    let query = SimilarityQuery::CosineSimilarity(query_tokens);
17    let mut result = vectorizer.similarity(query);
18    result.sort_by_score();
19
20    result.list.iter().for_each(|(k, s)| {
21        println!("doc: {}, score: {}", k, s);
22    });    
23}