pub struct TokenFrequency {
pub token_count: HashMap<String, u32>,
pub total_token_count: u64,
}Expand description
TokenFrequency 構造体
Fields§
§token_count: HashMap<String, u32>§total_token_count: u64Implementations§
Source§impl TokenFrequency
impl TokenFrequency
pub fn new() -> Self
pub fn add_token(&mut self, token: &str) -> &mut Self
pub fn add_token_n(&mut self, token: &str, n: u32) -> &mut Self
pub fn add_tokens(&mut self, tokens: &[&str]) -> &mut Self
pub fn add_tokens_string(&mut self, tokens: &[String]) -> &mut Self
pub fn sub_token(&mut self, token: &str) -> &mut Self
pub fn sub_token_n(&mut self, token: &str, n: u32) -> &mut Self
pub fn sub_tokens(&mut self, tokens: &[&str]) -> &mut Self
pub fn sub_tokens_string(&mut self, tokens: &[String]) -> &mut Self
pub fn tf_calc(max_count: u32, count: u32) -> f64
pub fn tf_calc_as_u16(max_count: u32, count: u32) -> u16
pub fn tf_calc_as_u32(max_count: u32, count: u32) -> u32
pub fn get_tf_vector(&self) -> Vec<(String, u16)>
pub fn get_tf_vector_parallel(&self) -> Vec<(String, u16)>
pub fn get_tf_vector_ref(&self) -> Vec<(&str, u16)>
pub fn get_tf_vector_ref_parallel(&self) -> Vec<(&str, u16)>
pub fn get_tf_hashmap(&self) -> HashMap<String, u16>
pub fn get_tf_hashmap_parallel(&self) -> HashMap<String, u16>
pub fn get_tf_hashmap_ref(&self) -> HashMap<&str, u16>
pub fn get_tf_hashmap_ref_parallel(&self) -> HashMap<&str, u16>
pub fn get_token_tf(&self, token: &str) -> u16
pub fn idf_max(&self, total_doc_count: u64) -> f64
pub fn idf_calc(total_doc_count: u64, max_idf: f64, doc_count: u32) -> f64
pub fn idf_calc_as_u16( total_doc_count: u64, max_idf: f64, doc_count: u32, ) -> u16
pub fn idf_calc_as_u32( total_doc_count: u64, max_idf: f64, doc_count: u32, ) -> u32
pub fn get_idf_vector(&self, total_doc_count: u64) -> Vec<(String, u16)>
pub fn get_idf_vector_ref(&self, total_doc_count: u64) -> Vec<(&str, u16)>
pub fn get_idf_vector_parallel( &self, total_doc_count: u64, ) -> Vec<(String, u16)>
pub fn get_idf_vector_ref_parallel( &self, total_doc_count: u64, ) -> Vec<(&str, u16)>
pub fn get_idf_hashmap(&self, total_doc_count: u64) -> HashMap<String, u16>
pub fn get_idf_hashmap_ref(&self, total_doc_count: u64) -> HashMap<&str, u16>
pub fn get_idf_hashmap_parallel( &self, total_doc_count: u64, ) -> HashMap<String, u16>
pub fn get_idf_hashmap_ref_parallel( &self, total_doc_count: u64, ) -> HashMap<&str, u16>
pub fn get_token_count_vector(&self) -> Vec<(String, u32)>
pub fn get_token_count_hashmap(&self) -> HashMap<String, u32>
pub fn get_token_count_hashmap_ref(&self) -> HashMap<&str, u32>
pub fn get_total_token_count(&self) -> u64
pub fn get_total_token_count_ref(&self) -> &u64
pub fn get_token_count(&self, token: &str) -> u32
pub fn get_token_count_ref(&self, token: &str) -> &u32
pub fn get_most_frequent_tokens(&self) -> Vec<(String, u32)>
pub fn get_most_frequent_token_count(&self) -> u32
pub fn get_most_frequent_tokens_parallel(&self) -> Vec<(String, u32)>
pub fn tfidf_calc(tf: f64, idf: f64) -> f64
pub fn tfidf_calc_as_u16(tf: u16, idf: u16) -> u16
pub fn tfidf_calc_as_u32(tf: u32, idf: u32) -> u32
pub fn get_tfidf_vector( &self, idf_map: &HashMap<String, u16>, ) -> Vec<(String, u16)>
pub fn get_tfidf_vector_fst(&self, idf_map: &Map<Vec<u8>>) -> Vec<(String, u16)>
pub fn get_tfidf_hashmap( &self, idf_map: &HashMap<String, u16>, ) -> HashMap<String, u16>
pub fn get_tfidf_hashmap_fst( &self, idf_map: &Map<Vec<u8>>, ) -> HashMap<String, u16>
pub fn get_tfidf_vector_parallel( &self, idf_map: &HashMap<String, u16>, ) -> Vec<(String, u16)>
pub fn get_tfidf_vector_fst_parallel( &self, idf_map: &Map<Vec<u8>>, ) -> Vec<(String, u16)>
pub fn get_tfidf_hashmap_parallel( &self, idf_map: &HashMap<String, u16>, ) -> HashMap<String, u16>
pub fn get_tfidf_hashmap_fst_parallel( &self, idf_map: &Map<Vec<u8>>, ) -> HashMap<String, u16>
pub fn contains_token(&self, token: &str) -> bool
pub fn get_token_set(&self) -> Vec<String>
pub fn get_token_set_ref(&self) -> Vec<&str>
pub fn get_token_hashset(&self) -> HashSet<String>
pub fn get_token_hashset_ref(&self) -> HashSet<&str>
pub fn get_token_set_len(&self) -> usize
pub fn get_token_set_iter(&self) -> Keys<'_, String, u32>
pub fn get_token_set_iter_ref(&self) -> impl Iterator<Item = &str>
pub fn get_token_length_stats(&self) -> Option<(usize, usize, f64)>
pub fn get_token_length_stats_ref(&self) -> Option<(usize, usize, f64)>
pub fn get_token_length_stats_parallel(&self) -> Option<(usize, usize, f64)>
pub fn remove_stop_tokens(&mut self, stop_tokens: &[&str])
pub fn remove_stop_tokens_parallel(&mut self, stop_tokens: &[&str])
pub fn remove_tokens_by_condition<F>(&mut self, condition: F) -> u64
pub fn get_sorted_by_frequency_desc(&self) -> Vec<(String, u32)>
pub fn get_sorted_by_frequency_desc_parallel(&self) -> Vec<(String, u32)>
pub fn get_sorted_by_frequency_asc(&self) -> Vec<(String, u32)>
pub fn get_sorted_by_frequency_asc_parallel(&self) -> Vec<(String, u32)>
pub fn get_sorted_by_alphabetical_asc(&self) -> Vec<(String, u32)>
pub fn get_sorted_by_alphabetical_asc_parallel(&self) -> Vec<(String, u32)>
pub fn get_sorted_by_alphabetical_desc(&self) -> Vec<(String, u32)>
pub fn get_sorted_by_alphabetical_desc_parallel(&self) -> Vec<(String, u32)>
pub fn get_sorted_by_length_desc(&self) -> Vec<(String, u32)>
pub fn get_sorted_by_length_desc_parallel(&self) -> Vec<(String, u32)>
pub fn get_sorted_by_length_asc(&self) -> Vec<(String, u32)>
pub fn get_sorted_by_length_asc_parallel(&self) -> Vec<(String, u32)>
pub fn get_unique_token_ratio(&self) -> f64
pub fn reset(&mut self)
Trait Implementations§
Source§impl Clone for TokenFrequency
impl Clone for TokenFrequency
Source§fn clone(&self) -> TokenFrequency
fn clone(&self) -> TokenFrequency
Returns a copy of the value. Read more
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source. Read moreSource§impl Debug for TokenFrequency
impl Debug for TokenFrequency
Source§impl<'de> Deserialize<'de> for TokenFrequency
impl<'de> Deserialize<'de> for TokenFrequency
Source§fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
Deserialize this value from the given Serde deserializer. Read more
Auto Trait Implementations§
impl Freeze for TokenFrequency
impl RefUnwindSafe for TokenFrequency
impl Send for TokenFrequency
impl Sync for TokenFrequency
impl Unpin for TokenFrequency
impl UnwindSafe for TokenFrequency
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more