pub struct TokenFrequency { /* private fields */ }Expand description
TokenFrequency struct Manages the frequency of token occurrences. Counts the number of times each token appears.
§Examples
use crate::tf_idf_vectorizer::vectorizer::token::TokenFrequency;
let mut token_freq = TokenFrequency::new();
token_freq.add_token("token1");
token_freq.add_token("token2");
token_freq.add_token("token1");
assert_eq!(token_freq.token_count("token1"), 2);Implementations§
Source§impl TokenFrequency
Implementation for adding and removing tokens
impl TokenFrequency
Implementation for adding and removing tokens
Sourcepub fn add_tokens<T>(&mut self, tokens: &[T]) -> &mut Self
pub fn add_tokens<T>(&mut self, tokens: &[T]) -> &mut Self
Sourcepub fn sub_tokens<T>(&mut self, tokens: &[T]) -> &mut Self
pub fn sub_tokens<T>(&mut self, tokens: &[T]) -> &mut Self
Sourcepub fn set_token_count(&mut self, token: &str, count: u64) -> &mut Self
pub fn set_token_count(&mut self, token: &str, count: u64) -> &mut Self
Sourcepub fn add_tokens_from_freq(&mut self, other: &TokenFrequency) -> &mut Self
pub fn add_tokens_from_freq(&mut self, other: &TokenFrequency) -> &mut Self
Source§impl TokenFrequency
Implementation for retrieving information from TokenFrequency
impl TokenFrequency
Implementation for retrieving information from TokenFrequency
Sourcepub fn iter(&self) -> impl Iterator<Item = (&str, u64)>
pub fn iter(&self) -> impl Iterator<Item = (&str, u64)>
Get iterator over all tokens and their counts
§Returns
impl Iterator<Item=(&str, u64)>- Iterator over tokens and their counts
Sourcepub fn token_count_vector(&self) -> Vec<(String, u64)>
pub fn token_count_vector(&self) -> Vec<(String, u64)>
Get a vector of all tokens and their counts
§Returns
Vec<(String, u64)>- Vector of tokens and their counts
Sourcepub fn token_count_vector_ref_str(&self) -> Vec<(&str, u64)>
pub fn token_count_vector_ref_str(&self) -> Vec<(&str, u64)>
Get a vector of all tokens and their counts (as &str)
§Returns
Vec<(&str, u64)>- Vector of tokens and their counts
Sourcepub fn token_count_hashmap_ref_str(&self) -> HashMap<&str, u64, RandomState>
pub fn token_count_hashmap_ref_str(&self) -> HashMap<&str, u64, RandomState>
Get a hashmap of all tokens and their counts (as &str)
§Returns
HashMap<&str, u64>- HashMap of tokens and their counts
Sourcepub fn token_count(&self, token: &str) -> u64
pub fn token_count(&self, token: &str) -> u64
Sourcepub fn most_frequent_tokens_vector(&self) -> Vec<(String, u64)>
pub fn most_frequent_tokens_vector(&self) -> Vec<(String, u64)>
Get the most frequent tokens If multiple tokens have the same count, all are returned
§Returns
Vec<(String, u64)>- Vector of most frequent tokens and their counts
Sourcepub fn most_frequent_token_count(&self) -> u64
pub fn most_frequent_token_count(&self) -> u64
Sourcepub fn contains_token(&self, token: &str) -> bool
pub fn contains_token(&self, token: &str) -> bool
Sourcepub fn token_set_iter(&self) -> impl Iterator<Item = &str>
pub fn token_set_iter(&self) -> impl Iterator<Item = &str>
Sourcepub fn token_set_ref_str(&self) -> Vec<&str>
pub fn token_set_ref_str(&self) -> Vec<&str>
Sourcepub fn token_hashset(&self) -> HashSet<String, RandomState>
pub fn token_hashset(&self) -> HashSet<String, RandomState>
Sourcepub fn token_hashset_ref_str(&self) -> HashSet<&str, RandomState>
pub fn token_hashset_ref_str(&self) -> HashSet<&str, RandomState>
Sourcepub fn remove_stop_tokens(&mut self, stop_tokens: &[&str]) -> u64
pub fn remove_stop_tokens(&mut self, stop_tokens: &[&str]) -> u64
Sourcepub fn remove_tokens_by<F>(&mut self, condition: F) -> u64
pub fn remove_tokens_by<F>(&mut self, condition: F) -> u64
Sourcepub fn sorted_frequency_vector(&self) -> Vec<(String, u64)>
pub fn sorted_frequency_vector(&self) -> Vec<(String, u64)>
Get a vector of tokens sorted by frequency (descending)
§Returns
Vec<(String, u64)>- Vector of tokens sorted by frequency
Sourcepub fn sorted_dict_order_vector(&self) -> Vec<(String, u64)>
pub fn sorted_dict_order_vector(&self) -> Vec<(String, u64)>
Get a vector of tokens sorted by dictionary order (ascending)
§Returns
Vec<(String, u64)>- Vector of tokens sorted by dictionary order
Sourcepub fn unique_token_ratio(&self) -> f64
pub fn unique_token_ratio(&self) -> f64
Calculate the diversity of tokens 1.0 indicates complete diversity, 0.0 indicates no diversity
§Returns
f64- Diversity of tokens
Sourcepub fn probability_vector(&self) -> Vec<(String, f64)>
pub fn probability_vector(&self) -> Vec<(String, f64)>
Get the probability distribution P(token) (owned String version) Returns an empty vector if total is 0
Sourcepub fn probability_vector_ref_str(&self) -> Vec<(&str, f64)>
pub fn probability_vector_ref_str(&self) -> Vec<(&str, f64)>
Get the probability distribution P(token) (as &str) Returns an empty vector if total is 0
Sourcepub fn probability(&self, token: &str) -> f64
pub fn probability(&self, token: &str) -> f64
Get the probability P(token) for a specific token Returns 0.0 if total is 0
Sourcepub fn shrink_to_fit(&mut self)
pub fn shrink_to_fit(&mut self)
Shrink internal storage to fit current size
Trait Implementations§
Source§impl Clone for TokenFrequency
impl Clone for TokenFrequency
Source§fn clone(&self) -> TokenFrequency
fn clone(&self) -> TokenFrequency
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read more