pub struct TokenFrequency { /* private fields */ }Expand description
TokenFrequency 構造体 tokenの出現頻度を管理するための構造体です tokenの出現回数をカウントします。
§Examples
use vectorizer::token::TokenFrequency;
let mut token_freq = TokenFrequency::new();
token_freq.add_token("token1");
token_freq.add_token("token2");
token_freq.add_token("token1");
Implementations§
Source§impl TokenFrequency
Tokenの追加、削除の実装
impl TokenFrequency
Tokenの追加、削除の実装
Sourcepub fn new() -> Self
pub fn new() -> Self
新しいTokenFrequencyを作成するメソッド
Examples found in repository?
examples/basic.rs (line 5)
3fn main() {
4 let corpus = Corpus::new();
5 let mut freq1 = TokenFrequency::new();
6 freq1.add_tokens(&["rust", "高速", "並列", "rust"]);
7 let mut freq2 = TokenFrequency::new();
8 freq2.add_tokens(&["rust", "柔軟", "安全", "rust"]);
9
10 let mut vectorizer: TFIDFVectorizer<u16> = TFIDFVectorizer::new(&corpus);
11 vectorizer.add_doc("doc1".to_string(), &freq1);
12 vectorizer.add_doc("doc2".to_string(), &freq2);
13
14 let mut query_tokens = TokenFrequency::new();
15 query_tokens.add_tokens(&["rust", "高速"]);
16 let query = SimilarityQuery::CosineSimilarity(query_tokens);
17 let mut result = vectorizer.similarity(query);
18 result.sort_by_score();
19
20 result.list.iter().for_each(|(k, s)| {
21 println!("doc: {}, score: {}", k, s);
22 });
23}Sourcepub fn add_tokens<T>(&mut self, tokens: &[T]) -> &mut Self
pub fn add_tokens<T>(&mut self, tokens: &[T]) -> &mut Self
Examples found in repository?
examples/basic.rs (line 6)
3fn main() {
4 let corpus = Corpus::new();
5 let mut freq1 = TokenFrequency::new();
6 freq1.add_tokens(&["rust", "高速", "並列", "rust"]);
7 let mut freq2 = TokenFrequency::new();
8 freq2.add_tokens(&["rust", "柔軟", "安全", "rust"]);
9
10 let mut vectorizer: TFIDFVectorizer<u16> = TFIDFVectorizer::new(&corpus);
11 vectorizer.add_doc("doc1".to_string(), &freq1);
12 vectorizer.add_doc("doc2".to_string(), &freq2);
13
14 let mut query_tokens = TokenFrequency::new();
15 query_tokens.add_tokens(&["rust", "高速"]);
16 let query = SimilarityQuery::CosineSimilarity(query_tokens);
17 let mut result = vectorizer.similarity(query);
18 result.sort_by_score();
19
20 result.list.iter().for_each(|(k, s)| {
21 println!("doc: {}, score: {}", k, s);
22 });
23}Sourcepub fn sub_tokens<T>(&mut self, tokens: &[T]) -> &mut Self
pub fn sub_tokens<T>(&mut self, tokens: &[T]) -> &mut Self
Sourcepub fn set_token_count(&mut self, token: &str, count: u64) -> &mut Self
pub fn set_token_count(&mut self, token: &str, count: u64) -> &mut Self
Source§impl TokenFrequency
TokenFrequencyの情報を取得するための実装
impl TokenFrequency
TokenFrequencyの情報を取得するための実装
Sourcepub fn token_count_vector(&self) -> Vec<(String, u64)>
pub fn token_count_vector(&self) -> Vec<(String, u64)>
Sourcepub fn token_count_vector_ref_str(&self) -> Vec<(&str, u64)>
pub fn token_count_vector_ref_str(&self) -> Vec<(&str, u64)>
Sourcepub fn token_count_hashmap_ref_str(&self) -> HashMap<&str, u64, RandomState>
pub fn token_count_hashmap_ref_str(&self) -> HashMap<&str, u64, RandomState>
Sourcepub fn token_count(&self, token: &str) -> u64
pub fn token_count(&self, token: &str) -> u64
Sourcepub fn most_frequent_tokens_vector(&self) -> Vec<(String, u64)>
pub fn most_frequent_tokens_vector(&self) -> Vec<(String, u64)>
もっとも多く出現したtokenを取得します 同じ出現回数のtokenが複数ある場合は、すべてのtokenを取得します
§Returns
Vec<(String, u32)>- トークンとその出現回数のベクタ
Sourcepub fn most_frequent_token_count(&self) -> u64
pub fn most_frequent_token_count(&self) -> u64
Sourcepub fn contains_token(&self, token: &str) -> bool
pub fn contains_token(&self, token: &str) -> bool
Sourcepub fn token_set_ref_str(&self) -> Vec<&str>
pub fn token_set_ref_str(&self) -> Vec<&str>
Sourcepub fn token_hashset(&self) -> HashSet<String, RandomState>
pub fn token_hashset(&self) -> HashSet<String, RandomState>
Sourcepub fn token_hashset_ref_str(&self) -> HashSet<&str, RandomState>
pub fn token_hashset_ref_str(&self) -> HashSet<&str, RandomState>
Sourcepub fn remove_stop_tokens(&mut self, stop_tokens: &[&str]) -> u64
pub fn remove_stop_tokens(&mut self, stop_tokens: &[&str]) -> u64
Sourcepub fn remove_tokens_by<F>(&mut self, condition: F) -> u64
pub fn remove_tokens_by<F>(&mut self, condition: F) -> u64
Sourcepub fn sorted_frequency_vector(&self) -> Vec<(String, u64)>
pub fn sorted_frequency_vector(&self) -> Vec<(String, u64)>
Sourcepub fn sorted_dict_order_vector(&self) -> Vec<(String, u64)>
pub fn sorted_dict_order_vector(&self) -> Vec<(String, u64)>
Sourcepub fn unique_token_ratio(&self) -> f64
pub fn unique_token_ratio(&self) -> f64
Sourcepub fn shrink_to_fit(&mut self)
pub fn shrink_to_fit(&mut self)
shrink internal storage to fit current size
Trait Implementations§
Source§impl Clone for TokenFrequency
impl Clone for TokenFrequency
Source§fn clone(&self) -> TokenFrequency
fn clone(&self) -> TokenFrequency
Returns a duplicate of the value. Read more
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source. Read moreSource§impl Debug for TokenFrequency
impl Debug for TokenFrequency
Source§impl<'de> Deserialize<'de> for TokenFrequency
impl<'de> Deserialize<'de> for TokenFrequency
Source§fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
Deserialize this value from the given Serde deserializer. Read more
Auto Trait Implementations§
impl Freeze for TokenFrequency
impl RefUnwindSafe for TokenFrequency
impl Send for TokenFrequency
impl Sync for TokenFrequency
impl Unpin for TokenFrequency
impl UnwindSafe for TokenFrequency
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more