[−][src]Struct rammer::BagOfWords
A BagOfWords, also referred to as a bow, is a frequency map of words. Read more about the BagOfWords model here: BagOfWords Wikipedia. BagOfWords works with Unicode Words. Words are defined by as between UAX#29 word boundaries. BagOfWords is serializable using one of the serde serialization crates
use rammer::BagOfWords; use serde_json; let singly_trained_bow = BagOfWords::from_file("test_resources/test_data/unicode_and_ascii.txt").unwrap(); let big_bow = BagOfWords::from_folder("data/train/ham"); let com_bow = singly_trained_bow.combine(big_bow);
Implementations
impl BagOfWords
[src]
pub fn new() -> Self
[src]
Return a new BagOfWords with an empty Frequency Map.
let empty_bow = BagOfWords::new();
pub fn from_file(file_path: &str) -> Option<Self>
[src]
Create a BagOfWords from a text file. This file should already be known to be ham or spam. The text file will be the basis of a new HSModel's Ham/Spam BagOfWords
let spam_bow = BagOfWords::from_file("test_resources/test_data/unicode_and_ascii.txt").unwrap();
pub fn from_folder(dir_path: &str) -> Self
[src]
Create a BagOfWords from a folder containing either spam training text files, or ham training text files.
let spam_bow = BagOfWords::from_folder("data/train/spam");
pub fn combine(self, other: Self) -> Self
[src]
Combines two BagOfWords into a new BagOfWords. Freqencies of words found in both bags are additive. This operation is commutative and associative. These properties can be used to dynamically grow your training BagOfWords.
let ham_bow_1 = BagOfWords::from("Hello there world"); // Creates: {HELLO: 1, THERE: 1, WORLD: 1} let ham_bow_2 = BagOfWords::from("howdy there guy"); // Creates: {HOWDY: 1, THERE: 1, GUY: 1} let com_bow = ham_bow_1.combine(ham_bow_2); // Combines to: {HELLO: 1, THERE: 2, HOWDY: 1, ...}
pub fn total_word_count(&self) -> Count
[src]
Get the sum of all the Counts in a BagOfWords. Used internally for frequency calculations.
ham_bow.total_word_count(); // returns a sum of Counts.
pub fn word_frequency(&self, word: &str) -> Option<Frequency>
[src]
Calculates the Frequency of a word in the BagOfWords by taking count_of_a_word / total_word_count. This will return None, if the word slice passed contains multiple words.
let ham_bow = BagOfWords::from("hello there how are you"); ham_bow.word_frequency("hello"); //returns 0.2 ham_bow.word_frequency("hello there"); //returns None
Trait Implementations
impl Debug for BagOfWords
[src]
impl<'de> Deserialize<'de> for BagOfWords
[src]
pub fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error> where
__D: Deserializer<'de>,
[src]
__D: Deserializer<'de>,
impl Eq for BagOfWords
[src]
impl<'_> From<&'_ str> for BagOfWords
[src]
Converts a &str to a bag of words. This to create BagOfWord models, consider using from_file or from_folder instead.
let bow = BagOfWords::from("hello world WOrLD"); // creates {HELLO: 1, WORLD: 2}
pub fn from(s: &str) -> BagOfWords
[src]
impl FromIterator<BagOfWords> for BagOfWords
[src]
Use .collect() over an iterator of BagOfWords to additively combine them with combine
let bow: BagOfWords = vec![ BagOfWords::from("hi"), BagOfWords::new(), BagOfWords::from("Big sale!")] .into_iter().collect();
pub fn from_iter<I: IntoIterator<Item = BagOfWords>>(iter: I) -> Self
[src]
impl PartialEq<BagOfWords> for BagOfWords
[src]
pub fn eq(&self, other: &BagOfWords) -> bool
[src]
pub fn ne(&self, other: &BagOfWords) -> bool
[src]
impl Serialize for BagOfWords
[src]
pub fn serialize<__S>(&self, __serializer: __S) -> Result<__S::Ok, __S::Error> where
__S: Serializer,
[src]
__S: Serializer,
impl StructuralEq for BagOfWords
[src]
impl StructuralPartialEq for BagOfWords
[src]
Auto Trait Implementations
impl RefUnwindSafe for BagOfWords
[src]
impl Send for BagOfWords
[src]
impl Sync for BagOfWords
[src]
impl Unpin for BagOfWords
[src]
impl UnwindSafe for BagOfWords
[src]
Blanket Implementations
impl<T> Any for T where
T: 'static + ?Sized,
[src]
T: 'static + ?Sized,
impl<T> Borrow<T> for T where
T: ?Sized,
[src]
T: ?Sized,
impl<T> BorrowMut<T> for T where
T: ?Sized,
[src]
T: ?Sized,
pub fn borrow_mut(&mut self) -> &mut T
[src]
impl<T> DeserializeOwned for T where
T: for<'de> Deserialize<'de>,
[src]
T: for<'de> Deserialize<'de>,
impl<T> From<T> for T
[src]
impl<T, U> Into<U> for T where
U: From<T>,
[src]
U: From<T>,
impl<T, U> TryFrom<U> for T where
U: Into<T>,
[src]
U: Into<T>,
type Error = Infallible
The type returned in the event of a conversion error.
pub fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>
[src]
impl<T, U> TryInto<U> for T where
U: TryFrom<T>,
[src]
U: TryFrom<T>,