TokenFrequency

Struct TokenFrequency 

Source
pub struct TokenFrequency { /* private fields */ }
Expand description

TokenFrequency struct Manages the frequency of token occurrences. Counts the number of times each token appears.

§Examples

use crate::tf_idf_vectorizer::vectorizer::token::TokenFrequency;
let mut token_freq = TokenFrequency::new();
token_freq.add_token("token1");
token_freq.add_token("token2");
token_freq.add_token("token1");

assert_eq!(token_freq.token_count("token1"), 2);

Implementations§

Source§

impl TokenFrequency

Implementation for adding and removing tokens

Source

pub fn new() -> Self

Create a new TokenFrequency

Source

pub fn add_token(&mut self, token: &str) -> &mut Self

Add a token

§Arguments
  • token - Token to add
Source

pub fn add_tokens<T>(&mut self, tokens: &[T]) -> &mut Self
where T: AsRef<str>,

Add multiple tokens

§Arguments
  • tokens - Slice of tokens to add
Source

pub fn sub_token(&mut self, token: &str) -> &mut Self

Subtract a token

§Arguments
  • token - Token to subtract
Source

pub fn sub_tokens<T>(&mut self, tokens: &[T]) -> &mut Self
where T: AsRef<str>,

Subtract multiple tokens

§Arguments
  • tokens - Slice of tokens to subtract
Source

pub fn set_token_count(&mut self, token: &str, count: u64) -> &mut Self

Set the occurrence count for a token

§Arguments
  • token - Token
  • count - Occurrence count
Source

pub fn add_tokens_from_freq(&mut self, other: &TokenFrequency) -> &mut Self

Merge with another TokenFrequency

§Arguments
  • other - Another TokenFrequency to merge with
Source

pub fn scale(&mut self, scalar: f64) -> &mut Self

Scale the token counts by a scalar

§Arguments
  • scalar - Scalar to scale by
Source§

impl TokenFrequency

Implementation for retrieving information from TokenFrequency

Source

pub fn iter(&self) -> impl Iterator<Item = (&str, u64)>

Get iterator over all tokens and their counts

§Returns
  • impl Iterator<Item=(&str, u64)> - Iterator over tokens and their counts
Source

pub fn token_count_vector(&self) -> Vec<(String, u64)>

Get a vector of all tokens and their counts

§Returns
  • Vec<(String, u64)> - Vector of tokens and their counts
Source

pub fn token_count_vector_ref_str(&self) -> Vec<(&str, u64)>

Get a vector of all tokens and their counts (as &str)

§Returns
  • Vec<(&str, u64)> - Vector of tokens and their counts
Source

pub fn token_count_hashmap_ref_str(&self) -> HashMap<&str, u64, RandomState>

Get a hashmap of all tokens and their counts (as &str)

§Returns
  • HashMap<&str, u64> - HashMap of tokens and their counts
Source

pub fn token_sum(&self) -> u64

Get the total count of all tokens

§Returns
  • u64 - Total token count
Source

pub fn token_count(&self, token: &str) -> u64

Get the occurrence count for a specific token

§Arguments
  • token - Token
§Returns
  • u64 - Occurrence count for the token
Source

pub fn most_frequent_tokens_vector(&self) -> Vec<(String, u64)>

Get the most frequent tokens If multiple tokens have the same count, all are returned

§Returns
  • Vec<(String, u64)> - Vector of most frequent tokens and their counts
Source

pub fn most_frequent_token_count(&self) -> u64

Get the count of the most frequent token

§Returns
  • u64 - Count of the most frequent token
Source

pub fn contains_token(&self, token: &str) -> bool

Check if a token exists

§Arguments
  • token - Token
§Returns
  • bool - true if the token exists, false otherwise
Source

pub fn token_set_iter(&self) -> impl Iterator<Item = &str>

token_set_iter

§Returns
  • impl Iterator<Item=&str> - Iterator over the set of tokens
Source

pub fn token_set(&self) -> Vec<String>

Get the set of tokens

§Returns
  • Vec<String> - Set of tokens
Source

pub fn token_set_ref_str(&self) -> Vec<&str>

Get the set of tokens (as &str)

§Returns
  • Vec<&str> - Set of tokens
Source

pub fn token_hashset(&self) -> HashSet<String, RandomState>

Get the set of tokens as a HashSet

§Returns
  • HashSet<String> - Set of tokens
Source

pub fn token_hashset_ref_str(&self) -> HashSet<&str, RandomState>

Get the set of tokens as a HashSet (as &str)

§Returns
  • HashSet<&str> - Set of tokens
Source

pub fn token_num(&self) -> usize

Get the number of unique tokens

§Returns
  • usize - Number of unique tokens
Source

pub fn remove_stop_tokens(&mut self, stop_tokens: &[&str]) -> u64

Remove stop tokens

§Arguments
  • stop_tokens - Slice of stop tokens to remove
§Returns
  • u64 - Total count of removed tokens
Source

pub fn remove_tokens_by<F>(&mut self, condition: F) -> u64
where F: Fn(&str, &u64) -> bool,

Remove tokens by a condition

§Arguments
  • condition - Closure to determine which tokens to remove
§Returns
  • u64 - Total count of removed tokens
Source

pub fn sorted_frequency_vector(&self) -> Vec<(String, u64)>

Get a vector of tokens sorted by frequency (descending)

§Returns
  • Vec<(String, u64)> - Vector of tokens sorted by frequency
Source

pub fn sorted_dict_order_vector(&self) -> Vec<(String, u64)>

Get a vector of tokens sorted by dictionary order (ascending)

§Returns
  • Vec<(String, u64)> - Vector of tokens sorted by dictionary order
Source

pub fn unique_token_ratio(&self) -> f64

Calculate the diversity of tokens 1.0 indicates complete diversity, 0.0 indicates no diversity

§Returns
  • f64 - Diversity of tokens
Source

pub fn probability_vector(&self) -> Vec<(String, f64)>

Get the probability distribution P(token) (owned String version) Returns an empty vector if total is 0

Source

pub fn probability_vector_ref_str(&self) -> Vec<(&str, f64)>

Get the probability distribution P(token) (as &str) Returns an empty vector if total is 0

Source

pub fn probability(&self, token: &str) -> f64

Get the probability P(token) for a specific token Returns 0.0 if total is 0

Source

pub fn clear(&mut self)

Reset all counts

Source

pub fn shrink_to_fit(&mut self)

Shrink internal storage to fit current size

Trait Implementations§

Source§

impl Clone for TokenFrequency

Source§

fn clone(&self) -> TokenFrequency

Returns a duplicate of the value. Read more
1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
Source§

impl Debug for TokenFrequency

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
Source§

impl<'de> Deserialize<'de> for TokenFrequency

Source§

fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>
where __D: Deserializer<'de>,

Deserialize this value from the given Serde deserializer. Read more
Source§

impl<T> From<&[T]> for TokenFrequency
where T: AsRef<str>,

Source§

fn from(tokens: &[T]) -> Self

Converts to this type from the input type.
Source§

impl From<Corpus> for TokenFrequency

Source§

fn from(corpus: Corpus) -> Self

Converts to this type from the input type.
Source§

impl Into<TokenFrequency> for &Corpus

Source§

fn into(self) -> TokenFrequency

Converts this type into the (usually inferred) input type.
Source§

impl Serialize for TokenFrequency

Source§

fn serialize<__S>(&self, __serializer: __S) -> Result<__S::Ok, __S::Error>
where __S: Serializer,

Serialize this value into the given Serde serializer. Read more

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> CloneToUninit for T
where T: Clone,

Source§

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dest. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> ToOwned for T
where T: Clone,

Source§

type Owned = T

The resulting type after obtaining ownership.
Source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
Source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<T> DeserializeOwned for T
where T: for<'de> Deserialize<'de>,