pub struct TermFrequency { /* private fields */ }Expand description
TermFrequency struct Manages the frequency of term occurrences. Counts the number of times each term appears.
§Examples
use crate::tf_idf_vectorizer::vectorizer::term::TermFrequency;
let mut term_freq = TermFrequency::new();
term_freq.add_term("term1");
term_freq.add_term("term2");
term_freq.add_term("term1");
assert_eq!(term_freq.term_count("term1"), 2);Implementations§
Source§impl TermFrequency
Implementation for adding and removing terms
impl TermFrequency
Implementation for adding and removing terms
Sourcepub fn set_term_count(&mut self, term: &str, count: u64) -> &mut Self
pub fn set_term_count(&mut self, term: &str, count: u64) -> &mut Self
Sourcepub fn add_terms_from_freq(&mut self, other: &TermFrequency) -> &mut Self
pub fn add_terms_from_freq(&mut self, other: &TermFrequency) -> &mut Self
Source§impl TermFrequency
Implementation for retrieving information from TermFrequency
impl TermFrequency
Implementation for retrieving information from TermFrequency
Sourcepub fn iter(&self) -> impl Iterator<Item = (&str, u64)>
pub fn iter(&self) -> impl Iterator<Item = (&str, u64)>
Get iterator over all terms and their counts
§Returns
impl Iterator<Item=(&str, u64)>- Iterator over terms and their counts
Sourcepub fn term_count_vector(&self) -> Vec<(String, u64)>
pub fn term_count_vector(&self) -> Vec<(String, u64)>
Get a vector of all terms and their counts
§Returns
Vec<(String, u64)>- Vector of terms and their counts
Sourcepub fn term_count_vector_ref_str(&self) -> Vec<(&str, u64)>
pub fn term_count_vector_ref_str(&self) -> Vec<(&str, u64)>
Get a vector of all terms and their counts (as &str)
§Returns
Vec<(&str, u64)>- Vector of terms and their counts
Sourcepub fn term_count_hashmap_ref_str(&self) -> HashMap<&str, u64, RandomState>
pub fn term_count_hashmap_ref_str(&self) -> HashMap<&str, u64, RandomState>
Get a hashmap of all terms and their counts (as &str)
§Returns
HashMap<&str, u64>- HashMap of terms and their counts
Sourcepub fn term_count(&self, term: &str) -> u64
pub fn term_count(&self, term: &str) -> u64
Sourcepub fn most_frequent_terms_vector(&self) -> Vec<(String, u64)>
pub fn most_frequent_terms_vector(&self) -> Vec<(String, u64)>
Get the most frequent terms If multiple terms have the same count, all are returned
§Returns
Vec<(String, u64)>- Vector of most frequent terms and their counts
Sourcepub fn most_frequent_term_count(&self) -> u64
pub fn most_frequent_term_count(&self) -> u64
Sourcepub fn contains_term(&self, term: &str) -> bool
pub fn contains_term(&self, term: &str) -> bool
Sourcepub fn term_set_iter(&self) -> impl Iterator<Item = &str>
pub fn term_set_iter(&self) -> impl Iterator<Item = &str>
Sourcepub fn term_set_ref_str(&self) -> Vec<&str>
pub fn term_set_ref_str(&self) -> Vec<&str>
Sourcepub fn term_hashset(&self) -> HashSet<String, RandomState>
pub fn term_hashset(&self) -> HashSet<String, RandomState>
Sourcepub fn term_hashset_ref_str(&self) -> HashSet<&str, RandomState>
pub fn term_hashset_ref_str(&self) -> HashSet<&str, RandomState>
Sourcepub fn remove_stop_terms(&mut self, stop_terms: &[&str]) -> u64
pub fn remove_stop_terms(&mut self, stop_terms: &[&str]) -> u64
Sourcepub fn remove_terms_by<F>(&mut self, condition: F) -> u64
pub fn remove_terms_by<F>(&mut self, condition: F) -> u64
Sourcepub fn sorted_frequency_vector(&self) -> Vec<(String, u64)>
pub fn sorted_frequency_vector(&self) -> Vec<(String, u64)>
Get a vector of terms sorted by frequency (descending)
§Returns
Vec<(String, u64)>- Vector of terms sorted by frequency
Sourcepub fn sorted_dict_order_vector(&self) -> Vec<(String, u64)>
pub fn sorted_dict_order_vector(&self) -> Vec<(String, u64)>
Get a vector of terms sorted by dictionary order (ascending)
§Returns
Vec<(String, u64)>- Vector of terms sorted by dictionary order
Sourcepub fn unique_term_ratio(&self) -> f64
pub fn unique_term_ratio(&self) -> f64
Calculate the diversity of terms 1.0 indicates complete diversity, 0.0 indicates no diversity
§Returns
f64- Diversity of terms
Sourcepub fn probability_vector(&self) -> Vec<(String, f64)>
pub fn probability_vector(&self) -> Vec<(String, f64)>
Get the probability distribution P(term) (owned String version) Returns an empty vector if total is 0
Sourcepub fn probability_vector_ref_str(&self) -> Vec<(&str, f64)>
pub fn probability_vector_ref_str(&self) -> Vec<(&str, f64)>
Get the probability distribution P(term) (as &str) Returns an empty vector if total is 0
Sourcepub fn probability(&self, term: &str) -> f64
pub fn probability(&self, term: &str) -> f64
Get the probability P(term) for a specific term Returns 0.0 if total is 0
Sourcepub fn shrink_to_fit(&mut self)
pub fn shrink_to_fit(&mut self)
Shrink internal storage to fit current size
Trait Implementations§
Source§impl Clone for TermFrequency
impl Clone for TermFrequency
Source§fn clone(&self) -> TermFrequency
fn clone(&self) -> TermFrequency
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read more