TermFrequency

Struct TermFrequency 

Source
pub struct TermFrequency { /* private fields */ }
Expand description

TermFrequency struct Manages the frequency of term occurrences. Counts the number of times each term appears.

§Examples

use crate::tf_idf_vectorizer::vectorizer::term::TermFrequency;
let mut term_freq = TermFrequency::new();
term_freq.add_term("term1");
term_freq.add_term("term2");
term_freq.add_term("term1");

assert_eq!(term_freq.term_count("term1"), 2);

Implementations§

Source§

impl TermFrequency

Implementation for adding and removing terms

Source

pub fn new() -> Self

Create a new TermFrequency

Source

pub fn add_term(&mut self, term: &str) -> &mut Self

Add a term

§Arguments
  • term - term to add
Source

pub fn add_terms<T>(&mut self, terms: &[T]) -> &mut Self
where T: AsRef<str>,

Add multiple terms

§Arguments
  • terms - Slice of terms to add
Source

pub fn sub_term(&mut self, term: &str) -> &mut Self

Subtract a term

§Arguments
  • term - term to subtract
Source

pub fn sub_terms<T>(&mut self, terms: &[T]) -> &mut Self
where T: AsRef<str>,

Subtract multiple terms

§Arguments
  • terms - Slice of terms to subtract
Source

pub fn set_term_count(&mut self, term: &str, count: u64) -> &mut Self

Set the occurrence count for a term

§Arguments
  • term - term
  • count - Occurrence count
Source

pub fn add_terms_from_freq(&mut self, other: &TermFrequency) -> &mut Self

Merge with another TermFrequency

§Arguments
  • other - Another TermFrequency to merge with
Source

pub fn scale(&mut self, scalar: f64) -> &mut Self

Scale the term counts by a scalar

§Arguments
  • scalar - Scalar to scale by
Source§

impl TermFrequency

Implementation for retrieving information from TermFrequency

Source

pub fn iter(&self) -> impl Iterator<Item = (&str, u64)>

Get iterator over all terms and their counts

§Returns
  • impl Iterator<Item=(&str, u64)> - Iterator over terms and their counts
Source

pub fn term_count_vector(&self) -> Vec<(String, u64)>

Get a vector of all terms and their counts

§Returns
  • Vec<(String, u64)> - Vector of terms and their counts
Source

pub fn term_count_vector_ref_str(&self) -> Vec<(&str, u64)>

Get a vector of all terms and their counts (as &str)

§Returns
  • Vec<(&str, u64)> - Vector of terms and their counts
Source

pub fn term_count_hashmap_ref_str(&self) -> HashMap<&str, u64, RandomState>

Get a hashmap of all terms and their counts (as &str)

§Returns
  • HashMap<&str, u64> - HashMap of terms and their counts
Source

pub fn term_sum(&self) -> u64

Get the total count of all terms

§Returns
  • u64 - Total term count
Source

pub fn term_count(&self, term: &str) -> u64

Get the occurrence count for a specific term

§Arguments
  • term - term
§Returns
  • u64 - Occurrence count for the term
Source

pub fn most_frequent_terms_vector(&self) -> Vec<(String, u64)>

Get the most frequent terms If multiple terms have the same count, all are returned

§Returns
  • Vec<(String, u64)> - Vector of most frequent terms and their counts
Source

pub fn most_frequent_term_count(&self) -> u64

Get the count of the most frequent term

§Returns
  • u64 - Count of the most frequent term
Source

pub fn contains_term(&self, term: &str) -> bool

Check if a term exists

§Arguments
  • term - term
§Returns
  • bool - true if the term exists, false otherwise
Source

pub fn term_set_iter(&self) -> impl Iterator<Item = &str>

term_set_iter

§Returns
  • impl Iterator<Item=&str> - Iterator over the set of terms
Source

pub fn term_set(&self) -> Vec<String>

Get the set of terms

§Returns
  • Vec<String> - Set of terms
Source

pub fn term_set_ref_str(&self) -> Vec<&str>

Get the set of terms (as &str)

§Returns
  • Vec<&str> - Set of terms
Source

pub fn term_hashset(&self) -> HashSet<String, RandomState>

Get the set of terms as a HashSet

§Returns
  • HashSet<String> - Set of terms
Source

pub fn term_hashset_ref_str(&self) -> HashSet<&str, RandomState>

Get the set of terms as a HashSet (as &str)

§Returns
  • HashSet<&str> - Set of terms
Source

pub fn term_num(&self) -> usize

Get the number of unique terms

§Returns
  • usize - Number of unique terms
Source

pub fn remove_stop_terms(&mut self, stop_terms: &[&str]) -> u64

Remove stop terms

§Arguments
  • stop_terms - Slice of stop terms to remove
§Returns
  • u64 - Total count of removed terms
Source

pub fn remove_terms_by<F>(&mut self, condition: F) -> u64
where F: Fn(&str, &u64) -> bool,

Remove terms by a condition

§Arguments
  • condition - Closure to determine which terms to remove
§Returns
  • u64 - Total count of removed terms
Source

pub fn sorted_frequency_vector(&self) -> Vec<(String, u64)>

Get a vector of terms sorted by frequency (descending)

§Returns
  • Vec<(String, u64)> - Vector of terms sorted by frequency
Source

pub fn sorted_dict_order_vector(&self) -> Vec<(String, u64)>

Get a vector of terms sorted by dictionary order (ascending)

§Returns
  • Vec<(String, u64)> - Vector of terms sorted by dictionary order
Source

pub fn unique_term_ratio(&self) -> f64

Calculate the diversity of terms 1.0 indicates complete diversity, 0.0 indicates no diversity

§Returns
  • f64 - Diversity of terms
Source

pub fn probability_vector(&self) -> Vec<(String, f64)>

Get the probability distribution P(term) (owned String version) Returns an empty vector if total is 0

Source

pub fn probability_vector_ref_str(&self) -> Vec<(&str, f64)>

Get the probability distribution P(term) (as &str) Returns an empty vector if total is 0

Source

pub fn probability(&self, term: &str) -> f64

Get the probability P(term) for a specific term Returns 0.0 if total is 0

Source

pub fn clear(&mut self)

Reset all counts

Source

pub fn shrink_to_fit(&mut self)

Shrink internal storage to fit current size

Trait Implementations§

Source§

impl Clone for TermFrequency

Source§

fn clone(&self) -> TermFrequency

Returns a duplicate of the value. Read more
1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
Source§

impl Debug for TermFrequency

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
Source§

impl<'de> Deserialize<'de> for TermFrequency

Source§

fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>
where __D: Deserializer<'de>,

Deserialize this value from the given Serde deserializer. Read more
Source§

impl<T> From<&[T]> for TermFrequency
where T: AsRef<str>,

Source§

fn from(terms: &[T]) -> Self

Converts to this type from the input type.
Source§

impl From<Corpus> for TermFrequency

Source§

fn from(corpus: Corpus) -> Self

Converts to this type from the input type.
Source§

impl Into<TermFrequency> for &Corpus

Source§

fn into(self) -> TermFrequency

Converts this type into the (usually inferred) input type.
Source§

impl Serialize for TermFrequency

Source§

fn serialize<__S>(&self, __serializer: __S) -> Result<__S::Ok, __S::Error>
where __S: Serializer,

Serialize this value into the given Serde serializer. Read more

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> CloneToUninit for T
where T: Clone,

Source§

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dest. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> ToOwned for T
where T: Clone,

Source§

type Owned = T

The resulting type after obtaining ownership.
Source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
Source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<T> DeserializeOwned for T
where T: for<'de> Deserialize<'de>,