1use std::sync::Arc;
2
3use tf_idf_vectorizer::{Corpus, SimilarityAlgorithm, TFIDFVectorizer, TokenFrequency};
4
5fn main() {
6 let corpus = Arc::new(Corpus::new());
8
9 let mut freq1 = TokenFrequency::new();
11 freq1.add_tokens(&["rust", "高速", "並列", "rust"]);
12 let mut freq2 = TokenFrequency::new();
13 freq2.add_tokens(&["rust", "柔軟", "安全", "rust"]);
14
15 let mut vectorizer: TFIDFVectorizer<u16> = TFIDFVectorizer::new(corpus);
17 vectorizer.add_doc("doc1".to_string(), &freq1);
18 vectorizer.add_doc("doc2".to_string(), &freq2);
19
20 let mut query_tokens = TokenFrequency::new();
22 query_tokens.add_tokens(&["rust", "高速"]);
23 let algorithm = SimilarityAlgorithm::CosineSimilarity;
24 let mut result = vectorizer.similarity(&query_tokens, &algorithm);
25 result.sort_by_score();
26
27 result.list.iter().for_each(|(k, s, l)| {
29 println!("doc: {}, score: {}, length: {}", k, s, l);
30 });
31 println!("result count: {}", result.list.len());
33 println!("{:?}", vectorizer);
34}