1use std::sync::Arc;
2
3use tf_idf_vectorizer::{Corpus, SimilarityAlgorithm, TFIDFVectorizer, TokenFrequency};
4
5fn main() {
6 let corpus = Arc::new(Corpus::new());
8
9 let mut freq1 = TokenFrequency::new();
11 freq1.add_tokens(&["rust", "高速", "並列", "rust"]);
12 let mut freq2 = TokenFrequency::new();
13 freq2.add_tokens(&["rust", "柔軟", "安全", "rust"]);
14
15 let mut vectorizer: TFIDFVectorizer<u16> = TFIDFVectorizer::new(corpus);
17 vectorizer.add_doc("doc1".to_string(), &freq1);
18 vectorizer.add_doc("doc2".to_string(), &freq2);
19 vectorizer.del_doc(&"doc1".to_string());
20 vectorizer.add_doc("doc3".to_string(), &freq1);
21
22 let mut query_tokens = TokenFrequency::new();
24 query_tokens.add_tokens(&["rust", "高速"]);
25 let algorithm = SimilarityAlgorithm::CosineSimilarity;
26 let mut result = vectorizer.similarity(&query_tokens, &algorithm);
27 result.sort_by_score();
28
29 result.list.iter().for_each(|(k, s, l)| {
31 println!("doc: {}, score: {}, length: {}", k, s, l);
32 });
33 println!("result count: {}", result.list.len());
35 println!("{:?}", vectorizer);
36}