1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
//! # bayespam //! //! A simple bayesian spam classifier. //! //! ## About //! //! Bayespam is inspired by [Naive Bayes classifiers](https://en.wikipedia.org/wiki/Naive_Bayes_spam_filtering), a popular statistical technique of e-mail filtering. //! //! Here, the message to be identified is cut into simple words, also called tokens. //! That are compared to all the corpus of messages (spam or not), to determine the frequency of different tokens in both categories. //! //! A probabilistic formula is used to calculate the probability that the message is a spam. //! When the probability is high enough, the classifier categorizes the message as likely a spam, otherwise as likely a ham. //! The probability threshold is fixed at 0.8 by default. //! //! ## Usage //! //! Add to your `Cargo.toml`: //! //! ```ini //! [dependencies] //! bayespam = "1.0.0" //! ``` //! //! And to your crate root: //! //! ``` //! extern crate bayespam; //! //! use bayespam::classifier::Classifier; //! ``` //! //! ### Use the pre-trained model provided //! //! ``` //! extern crate bayespam; //! //! use bayespam::classifier; //! //! fn main() -> Result<(), std::io::Error> { //! // Identify a typical spam message //! let spam = "Lose up to 19% weight. Special promotion on our new weightloss."; //! let is_spam = classifier::identify(spam)?; //! assert!(is_spam); //! //! // Identify a typical ham message //! let ham = "Hi Bob, can you send me your machine learning homework?"; //! let is_spam = classifier::identify(ham)?; //! assert!(!is_spam); //! //! Ok(()) //! } //! ``` //! //! ### Train your own model //! //! ``` //! extern crate bayespam; //! //! use bayespam::classifier::Classifier; //! //! fn main() { //! // Create a new classifier with an empty model //! let mut classifier = Classifier::new(); //! //! // Train the classifier with a new spam example //! let spam = "Don't forget our special promotion: -30% on men shoes, only today!"; //! classifier.train_spam(spam); //! //! // Train the classifier with a new ham example //! let ham = "Hi Bob, don't forget our meeting today at 4pm."; //! classifier.train_ham(ham); //! //! // Identify a typical spam message //! let spam = "Lose up to 19% weight. Special promotion on our new weightloss."; //! let is_spam = classifier.identify(spam); //! assert!(is_spam); //! //! // Identify a typical ham message //! let ham = "Hi Bob, can you send me your machine learning homework?"; //! let is_spam = classifier.identify(ham); //! assert!(!is_spam); //! } //! ``` pub mod classifier;