basic/
basic.rs

1use std::{sync::Arc, vec};
2
3use tf_idf_vectorizer::{Corpus, SimilarityAlgorithm, TFIDFVectorizer, TokenFrequency};
4
5fn main() {
6    // build corpus
7    let corpus = Arc::new(Corpus::new());
8
9    // add documents
10    let mut freq1 = TokenFrequency::new();
11    freq1.add_tokens(&["rust", "高速", "並列", "rust"]);
12    let mut freq2 = TokenFrequency::new();
13    freq2.add_tokens(&["rust", "柔軟", "安全", "rust"]);
14
15    // build query
16    let mut vectorizer: TFIDFVectorizer<u16> = TFIDFVectorizer::new(corpus);    
17    vectorizer.add_doc("doc1".to_string(), &freq1);
18    vectorizer.add_doc("doc2".to_string(), &freq2);
19
20    // similarity search
21    let mut query_tokens = TokenFrequency::new();
22    query_tokens.add_tokens(&["rust", "高速"]);
23    let algorithm = SimilarityAlgorithm::CosineSimilarity;
24    let mut result = vectorizer.similarity(&query_tokens, &algorithm);
25    result.sort_by_score();
26
27    // print result
28    result.list.iter().for_each(|(k, s, l)| {
29        println!("doc: {}, score: {}, length: {}", k, s, l);
30    });
31    // debug
32    println!("result count: {}", result.list.len());
33    println!("{:?}", vectorizer);
34}