rammer/
lib.rs

1#![warn(missing_docs, missing_doc_code_examples)]
2//! Rammer, a play on Rust and the fact that spam is classified as Spam or Ham, is a spam/ham
3//! classification library.
4//!
5//! Here is an example program which trains and saves a new model for later use.
6//! ```no_run
7//! use rammer::{ HSModel, BagOfWords };
8//!
9//! fn main() {
10//!     let spam_bow = BagOfWords::from_folder("data/train/spam").expect("Folder not found");
11//!     let ham_bow = BagOfWords::from_folder("data/train/ham").expect("Folder not found");
12//!     let model = HSModel::from_bows(ham_bow, spam_bow);
13//!     model.write_to_json("out/models/enron1_model.json");
14//! }
15//! ```
16//!
17//! Here is an Example program using an existing model.
18//! ```no_run
19//! use rammer::HSModel;
20//! use std::fs;
21//! use rayon::prelude::*;
22//! fn main() {
23//!    let model = HSModel::read_from_json("out/models/enron1_model.json").unwrap();
24//!    let spam_answers = validate(&model, "data/validate/spam", "spam", |p| p > 0.8);
25//!    let ham_answers = validate(&model, "data/validate/ham", "ham", |p| p < 0.2);
26//!
27//!    println!("Spam Correctly Classified: {}/{} = {:.4}", spam_answers.0, spam_answers.1, spam_answers.2);
28//!    println!("Ham Correctly Classified: {}/{} = {:.4}", ham_answers.0, ham_answers.1, ham_answers.2);
29//! }
30//!
31//! fn validate<F>(model: &HSModel, dir: &str, class: &str, is_correct: F) -> (u32, usize, f64)
32//!     where F: Fn(f64) -> bool + Sync
33//! {
34//!     let ps: Vec<bool> = fs::read_dir(dir)
35//!         .expect("folder exists")
36//!         .par_bridge()
37//!         .filter_map(|maybe_entry| {
38//!             maybe_entry.ok().and_then(|entry| {
39//!                 fs::read_to_string(entry.path())
40//!                     .ok()
41//!                     .and_then(|text| Some(model.text_spam_probability(&text[..])))
42//!             })
43//!         })
44//!         .map(|p| { println!("Probability: {:.8}\t\t({})", p, class); is_correct(p) })
45//!         .collect();
46//!
47//!     let num_classified_correctly: u32 = ps
48//!         .iter()
49//!         .filter_map(|&b| if b { Some(1) } else { None })
50//!         .sum();
51//!
52//!     (
53//!         num_classified_correctly,
54//!         ps.len(),
55//!         num_classified_correctly as f64 / ps.len() as f64
56//!     )
57//!
58//! }
59//! ```  
60
61mod bag_of_words;
62mod hs_model;
63pub use bag_of_words::BagOfWords;
64pub use hs_model::HSModel;
65
66/// Type alias for rate of occurences of a value.
67/// This type should always be between [0,1].
68pub type Frequency = f64;
69
70/// Type alias for the statistical probability of an event.
71/// This type should always be between [0,1].
72pub type Probability = f64;
73
74/// Type alias for number of times a word is found in a BagOfWords.
75pub type Count = u32;