pub struct Model<T: ModelStore + Sync> { /* private fields */ }
Implementations§
Source§impl<T: ModelStore + Sync> Model<T>
impl<T: ModelStore + Sync> Model<T>
Sourcepub fn with_stop_words_file(self, stop_words_file: &str) -> Self
pub fn with_stop_words_file(self, stop_words_file: &str) -> Self
Examples found in repository?
examples/20newsgroup_stopwords/main.rs (line 11)
9fn main() {
10 let mut model = Model::new()
11 .with_stop_words_file("examples/data/english-stop-words-large.txt")
12 .with_pseudo_count(0.1);
13
14 let train_data = load_txt("examples/data/20newsgroup_train.txt");
15 let test_data = load_txt("examples/data/20newsgroup_test.txt");
16 let (test_labels, test_features): (Vec<String>, Vec<Vec<Feature>>) =
17 test_data.into_iter().map(|(s, v)| (s, v)).unzip();
18
19 println!(
20 "Train size: {}, test size: {}",
21 train_data.len(),
22 test_labels.len()
23 );
24
25 model.train("20newsgroup_model", &train_data);
26 println!("Training finished");
27
28 let predicts = model.predict_batch("20newsgroup_model", &test_features);
29 println!("Testing finished");
30
31 let total_test_score: f64 = test_labels
32 .iter()
33 .zip(predicts.iter())
34 .map(|(test_label, predict)| {
35 let (pred_label, _test_score) = predict
36 .iter()
37 .max_by(|(_ka, va), (_kb, vb)| va.partial_cmp(vb).unwrap())
38 .unwrap();
39 if test_label == pred_label {
40 1.0
41 } else {
42 0.0
43 }
44 })
45 .sum();
46 let score = total_test_score / test_labels.len() as f64;
47
48 println!("test score: {}", score);
49 assert!((0.66 - score).abs() < 1e-3);
50}
Sourcepub fn with_default_gaussian_m2(self, default_gaussian_m2: f64) -> Self
pub fn with_default_gaussian_m2(self, default_gaussian_m2: f64) -> Self
Examples found in repository?
examples/gaussian_mom/main.rs (line 6)
5fn main() {
6 let mut model = Model::new().with_default_gaussian_m2(100.0);
7
8 let input_train = vec![
9 (
10 "go play well".to_owned(),
11 vec![
12 Feature {
13 feature_type: FeatureType::Gaussian,
14 name: "weather.degree".to_owned(),
15 value: "32".to_owned(),
16 },
17 Feature {
18 feature_type: FeatureType::Category,
19 name: "weather.title".to_owned(),
20 value: "sunny".to_owned(),
21 },
22 Feature {
23 feature_type: FeatureType::Gaussian,
24 name: "weather.wind.level".to_owned(),
25 value: "3".to_owned(),
26 },
27 ],
28 ),
29 (
30 "go play well".to_owned(),
31 vec![
32 Feature {
33 feature_type: FeatureType::Gaussian,
34 name: "weather.degree".to_owned(),
35 value: "24".to_owned(),
36 },
37 Feature {
38 feature_type: FeatureType::Category,
39 name: "weather.title".to_owned(),
40 value: "cloudy".to_owned(),
41 },
42 Feature {
43 feature_type: FeatureType::Gaussian,
44 name: "weather.wind.level".to_owned(),
45 value: "1".to_owned(),
46 },
47 ],
48 ),
49 (
50 "take umbrella".to_owned(),
51 vec![
52 Feature {
53 feature_type: FeatureType::Gaussian,
54 name: "weather.degree".to_owned(),
55 value: "5".to_owned(),
56 },
57 Feature {
58 feature_type: FeatureType::Category,
59 name: "weather.title".to_owned(),
60 value: "rainy".to_owned(),
61 },
62 Feature {
63 feature_type: FeatureType::Gaussian,
64 name: "weather.wind.level".to_owned(),
65 value: "3".to_owned(),
66 },
67 ],
68 ),
69 (
70 "take umbrella".to_owned(),
71 vec![
72 Feature {
73 feature_type: FeatureType::Gaussian,
74 name: "weather.degree".to_owned(),
75 value: "25".to_owned(),
76 },
77 Feature {
78 feature_type: FeatureType::Category,
79 name: "weather.title".to_owned(),
80 value: "rainy".to_owned(),
81 },
82 Feature {
83 feature_type: FeatureType::Gaussian,
84 name: "weather.wind.level".to_owned(),
85 value: "4".to_owned(),
86 },
87 ],
88 ),
89 (
90 "wear more cloth".to_owned(),
91 vec![
92 Feature {
93 feature_type: FeatureType::Gaussian,
94 name: "weather.degree".to_owned(),
95 value: "-2".to_owned(),
96 },
97 Feature {
98 feature_type: FeatureType::Category,
99 name: "weather.title".to_owned(),
100 value: "cloudy".to_owned(),
101 },
102 Feature {
103 feature_type: FeatureType::Gaussian,
104 name: "weather.wind.level".to_owned(),
105 value: "3".to_owned(),
106 },
107 ],
108 ),
109 (
110 "wear more cloth".to_owned(),
111 vec![
112 Feature {
113 feature_type: FeatureType::Gaussian,
114 name: "weather.degree".to_owned(),
115 value: "2".to_owned(),
116 },
117 Feature {
118 feature_type: FeatureType::Category,
119 name: "weather.title".to_owned(),
120 value: "sunny".to_owned(),
121 },
122 Feature {
123 feature_type: FeatureType::Gaussian,
124 name: "weather.wind.level".to_owned(),
125 value: "3".to_owned(),
126 },
127 ],
128 ),
129 ];
130
131 model.train("Mom's word to me before I go out", &input_train);
132
133 // test example 1
134 let result = model.predict(
135 "Mom's word to me before I go out",
136 &vec![
137 Feature {
138 feature_type: FeatureType::Gaussian,
139 name: "weather.degree".to_owned(),
140 value: "0.0".to_owned(),
141 },
142 Feature {
143 feature_type: FeatureType::Category,
144 name: "weather.title".to_owned(),
145 value: "sunny".to_owned(),
146 },
147 Feature {
148 feature_type: FeatureType::Gaussian,
149 name: "weather.wind.level".to_owned(),
150 value: "2".to_owned(),
151 },
152 ],
153 );
154
155 println!("{:?}\n", result);
156 assert!(result.get("wear more cloth").unwrap().abs() > 0.7);
157 // result will be:
158 // {"wear more cloth": 0.8145285759525068, "go play well": 0.1310511820033621, "take umbrella": 0.05442024204413106}
159
160 // test example 2
161 let result = model.predict(
162 "Mom's word to me before I go out",
163 &vec![
164 Feature {
165 feature_type: FeatureType::Gaussian,
166 name: "weather.degree".to_owned(),
167 value: "28".to_owned(),
168 },
169 Feature {
170 feature_type: FeatureType::Category,
171 name: "weather.title".to_owned(),
172 value: "rainy".to_owned(),
173 },
174 Feature {
175 feature_type: FeatureType::Gaussian,
176 name: "weather.wind.level".to_owned(),
177 value: "5".to_owned(),
178 },
179 ],
180 );
181
182 println!("{:?}\n", result);
183 assert!(result.get("take umbrella").unwrap().abs() > 0.6);
184 // result will be:
185 // {"wear more cloth": 0.040777064361781155, "take umbrella": 0.6929647650603867, "go play well": 0.2662581705778321}
186
187 // test example 3
188 let result = model.predict(
189 "Mom's word to me before I go out",
190 &vec![
191 Feature {
192 feature_type: FeatureType::Gaussian,
193 name: "weather.degree".to_owned(),
194 value: "25".to_owned(),
195 },
196 Feature {
197 feature_type: FeatureType::Category,
198 name: "weather.title".to_owned(),
199 value: "cloudy".to_owned(),
200 },
201 Feature {
202 feature_type: FeatureType::Gaussian,
203 name: "weather.wind.level".to_owned(),
204 value: "3".to_owned(),
205 },
206 ],
207 );
208
209 println!("{:?}\n", result);
210 assert!(result.get("go play well").unwrap().abs() > 0.5);
211 // result will be:
212 // {"go play well": 0.6267604626518958, "wear more cloth": 0.14149599917558417, "take umbrella": 0.23174353817252016}
213}
Sourcepub fn with_pseudo_count(self, pseudo_count: f64) -> Self
pub fn with_pseudo_count(self, pseudo_count: f64) -> Self
Examples found in repository?
examples/uci_adult/main.rs (line 29)
26fn main() {
27 let mut model = Model::new()
28 .with_prior_factor(1.0)
29 .with_pseudo_count(0.1)
30 .with_default_gaussian_sigma_factor(0.05);
31
32 let train_data = load_txt("examples/data/adult.data");
33 let test_data = load_txt("examples/data/adult.test");
34 let (test_labels, test_features): (Vec<String>, Vec<Vec<Feature>>) =
35 test_data.into_iter().map(|(s, v)| (s, v)).unzip();
36
37 println!(
38 "Train size: {}, test size: {}",
39 train_data.len(),
40 test_features.len()
41 );
42
43 model.train("uci_adult", &train_data);
44 println!("Training finished");
45
46 let predicts = model.predict_batch("uci_adult", &test_features);
47 println!("Testing finished");
48
49 let total_test_score: f64 = test_labels
50 .iter()
51 .zip(predicts.iter())
52 .map(|(test_label, predict)| {
53 let (pred_label, _test_score) = predict
54 .iter()
55 .max_by(|(_ka, va), (_kb, vb)| va.partial_cmp(vb).unwrap())
56 .unwrap();
57
58 if test_label == pred_label {
59 1.0
60 } else {
61 0.0
62 }
63 })
64 .sum();
65
66 let score = total_test_score / test_labels.len() as f64;
67
68 println!("test score: {}", score);
69 assert!((0.83 - score).abs() < 1e-2);
70}
More examples
examples/20newsgroup_stopwords/main.rs (line 12)
9fn main() {
10 let mut model = Model::new()
11 .with_stop_words_file("examples/data/english-stop-words-large.txt")
12 .with_pseudo_count(0.1);
13
14 let train_data = load_txt("examples/data/20newsgroup_train.txt");
15 let test_data = load_txt("examples/data/20newsgroup_test.txt");
16 let (test_labels, test_features): (Vec<String>, Vec<Vec<Feature>>) =
17 test_data.into_iter().map(|(s, v)| (s, v)).unzip();
18
19 println!(
20 "Train size: {}, test size: {}",
21 train_data.len(),
22 test_labels.len()
23 );
24
25 model.train("20newsgroup_model", &train_data);
26 println!("Training finished");
27
28 let predicts = model.predict_batch("20newsgroup_model", &test_features);
29 println!("Testing finished");
30
31 let total_test_score: f64 = test_labels
32 .iter()
33 .zip(predicts.iter())
34 .map(|(test_label, predict)| {
35 let (pred_label, _test_score) = predict
36 .iter()
37 .max_by(|(_ka, va), (_kb, vb)| va.partial_cmp(vb).unwrap())
38 .unwrap();
39 if test_label == pred_label {
40 1.0
41 } else {
42 0.0
43 }
44 })
45 .sum();
46 let score = total_test_score / test_labels.len() as f64;
47
48 println!("test score: {}", score);
49 assert!((0.66 - score).abs() < 1e-3);
50}
Sourcepub fn with_prior_factor(self, prior_factor: f64) -> Self
pub fn with_prior_factor(self, prior_factor: f64) -> Self
Examples found in repository?
examples/uci_adult/main.rs (line 28)
26fn main() {
27 let mut model = Model::new()
28 .with_prior_factor(1.0)
29 .with_pseudo_count(0.1)
30 .with_default_gaussian_sigma_factor(0.05);
31
32 let train_data = load_txt("examples/data/adult.data");
33 let test_data = load_txt("examples/data/adult.test");
34 let (test_labels, test_features): (Vec<String>, Vec<Vec<Feature>>) =
35 test_data.into_iter().map(|(s, v)| (s, v)).unzip();
36
37 println!(
38 "Train size: {}, test size: {}",
39 train_data.len(),
40 test_features.len()
41 );
42
43 model.train("uci_adult", &train_data);
44 println!("Training finished");
45
46 let predicts = model.predict_batch("uci_adult", &test_features);
47 println!("Testing finished");
48
49 let total_test_score: f64 = test_labels
50 .iter()
51 .zip(predicts.iter())
52 .map(|(test_label, predict)| {
53 let (pred_label, _test_score) = predict
54 .iter()
55 .max_by(|(_ka, va), (_kb, vb)| va.partial_cmp(vb).unwrap())
56 .unwrap();
57
58 if test_label == pred_label {
59 1.0
60 } else {
61 0.0
62 }
63 })
64 .sum();
65
66 let score = total_test_score / test_labels.len() as f64;
67
68 println!("test score: {}", score);
69 assert!((0.83 - score).abs() < 1e-2);
70}
Sourcepub fn with_default_gaussian_sigma_factor(
self,
default_gaussian_sigma_factor: f64,
) -> Self
pub fn with_default_gaussian_sigma_factor( self, default_gaussian_sigma_factor: f64, ) -> Self
Examples found in repository?
examples/uci_adult/main.rs (line 30)
26fn main() {
27 let mut model = Model::new()
28 .with_prior_factor(1.0)
29 .with_pseudo_count(0.1)
30 .with_default_gaussian_sigma_factor(0.05);
31
32 let train_data = load_txt("examples/data/adult.data");
33 let test_data = load_txt("examples/data/adult.test");
34 let (test_labels, test_features): (Vec<String>, Vec<Vec<Feature>>) =
35 test_data.into_iter().map(|(s, v)| (s, v)).unzip();
36
37 println!(
38 "Train size: {}, test size: {}",
39 train_data.len(),
40 test_features.len()
41 );
42
43 model.train("uci_adult", &train_data);
44 println!("Training finished");
45
46 let predicts = model.predict_batch("uci_adult", &test_features);
47 println!("Testing finished");
48
49 let total_test_score: f64 = test_labels
50 .iter()
51 .zip(predicts.iter())
52 .map(|(test_label, predict)| {
53 let (pred_label, _test_score) = predict
54 .iter()
55 .max_by(|(_ka, va), (_kb, vb)| va.partial_cmp(vb).unwrap())
56 .unwrap();
57
58 if test_label == pred_label {
59 1.0
60 } else {
61 0.0
62 }
63 })
64 .sum();
65
66 let score = total_test_score / test_labels.len() as f64;
67
68 println!("test score: {}", score);
69 assert!((0.83 - score).abs() < 1e-2);
70}
Sourcepub fn train(
&mut self,
model_name: &str,
class_feature_pairs: &[(String, Vec<Feature>)],
)
pub fn train( &mut self, model_name: &str, class_feature_pairs: &[(String, Vec<Feature>)], )
Examples found in repository?
examples/20newsgroup/main.rs (line 26)
12fn main() {
13 let mut model = Model::new();
14
15 let train_data = load_txt("examples/data/20newsgroup_train.txt");
16 let test_data = load_txt("examples/data/20newsgroup_test.txt");
17
18 let test_data_len = test_data.len();
19
20 println!(
21 "Train size: {}, test size: {}",
22 train_data.len(),
23 test_data.len()
24 );
25
26 model.train("20newsgroup_model", &train_data);
27 println!("Training finished");
28
29 let total_test_score: f64 = test_data
30 .into_par_iter()
31 .map(|(test_label, features)| {
32 let predict = model.predict("20newsgroup_model", &features);
33 let (pred_label, _test_score) = predict
34 .iter()
35 .max_by(|(_ka, va), (_kb, vb)| va.partial_cmp(vb).unwrap())
36 .unwrap();
37 if &test_label == pred_label {
38 1.0
39 } else {
40 0.0
41 }
42 })
43 .sum();
44
45 println!("Testing finished");
46
47 let score = total_test_score / test_data_len as f64;
48
49 println!("test score: {}", score);
50 assert!((0.5771375464684015 - score).abs() < 1e-10);
51 // old master gives 0.5785979819437068
52 // blayze gives 0.5770609318996416
53 // pblayze gives 0.5770609318996416
54 // python 0.5779341
55}
More examples
examples/uci_adult/main.rs (line 43)
26fn main() {
27 let mut model = Model::new()
28 .with_prior_factor(1.0)
29 .with_pseudo_count(0.1)
30 .with_default_gaussian_sigma_factor(0.05);
31
32 let train_data = load_txt("examples/data/adult.data");
33 let test_data = load_txt("examples/data/adult.test");
34 let (test_labels, test_features): (Vec<String>, Vec<Vec<Feature>>) =
35 test_data.into_iter().map(|(s, v)| (s, v)).unzip();
36
37 println!(
38 "Train size: {}, test size: {}",
39 train_data.len(),
40 test_features.len()
41 );
42
43 model.train("uci_adult", &train_data);
44 println!("Training finished");
45
46 let predicts = model.predict_batch("uci_adult", &test_features);
47 println!("Testing finished");
48
49 let total_test_score: f64 = test_labels
50 .iter()
51 .zip(predicts.iter())
52 .map(|(test_label, predict)| {
53 let (pred_label, _test_score) = predict
54 .iter()
55 .max_by(|(_ka, va), (_kb, vb)| va.partial_cmp(vb).unwrap())
56 .unwrap();
57
58 if test_label == pred_label {
59 1.0
60 } else {
61 0.0
62 }
63 })
64 .sum();
65
66 let score = total_test_score / test_labels.len() as f64;
67
68 println!("test score: {}", score);
69 assert!((0.83 - score).abs() < 1e-2);
70}
examples/20newsgroup_stopwords/main.rs (line 25)
9fn main() {
10 let mut model = Model::new()
11 .with_stop_words_file("examples/data/english-stop-words-large.txt")
12 .with_pseudo_count(0.1);
13
14 let train_data = load_txt("examples/data/20newsgroup_train.txt");
15 let test_data = load_txt("examples/data/20newsgroup_test.txt");
16 let (test_labels, test_features): (Vec<String>, Vec<Vec<Feature>>) =
17 test_data.into_iter().map(|(s, v)| (s, v)).unzip();
18
19 println!(
20 "Train size: {}, test size: {}",
21 train_data.len(),
22 test_labels.len()
23 );
24
25 model.train("20newsgroup_model", &train_data);
26 println!("Training finished");
27
28 let predicts = model.predict_batch("20newsgroup_model", &test_features);
29 println!("Testing finished");
30
31 let total_test_score: f64 = test_labels
32 .iter()
33 .zip(predicts.iter())
34 .map(|(test_label, predict)| {
35 let (pred_label, _test_score) = predict
36 .iter()
37 .max_by(|(_ka, va), (_kb, vb)| va.partial_cmp(vb).unwrap())
38 .unwrap();
39 if test_label == pred_label {
40 1.0
41 } else {
42 0.0
43 }
44 })
45 .sum();
46 let score = total_test_score / test_labels.len() as f64;
47
48 println!("test score: {}", score);
49 assert!((0.66 - score).abs() < 1e-3);
50}
examples/spam/main.rs (line 51)
5fn main() {
6 let mut model = Model::new();
7
8 let input_train = vec![
9 (
10 "spam".to_owned(),
11 vec![
12 Feature {
13 feature_type: FeatureType::Text,
14 name: "email.body".to_owned(),
15 value: "Good day dear beneficiary. This is Secretary to president of Benin republic is writing this email ... heritage, tax, dollars, money, credit card...".to_owned(),
16 },
17 Feature {
18 feature_type: FeatureType::Category,
19 name: "email.domain".to_owned(),
20 value: "evil.com".to_owned(),
21 },
22 Feature {
23 feature_type: FeatureType::Gaussian,
24 name: "email.n_words".to_owned(),
25 value: "482".to_owned(),
26 },
27 ],
28 ),
29 (
30 "not spam".to_owned(),
31 vec![
32 Feature {
33 feature_type: FeatureType::Text,
34 name: "email.body".to_owned(),
35 value: "Hey bro, how's work these days, wanna join me for hotpot next week?".to_owned(),
36 },
37 Feature {
38 feature_type: FeatureType::Category,
39 name: "email.domain".to_owned(),
40 value: "gmail.com".to_owned(),
41 },
42 Feature {
43 feature_type: FeatureType::Gaussian,
44 name: "email.n_words".to_owned(),
45 value: "42".to_owned(),
46 },
47 ],
48 ),
49 ];
50
51 model.train("Spam checker", &input_train);
52
53 // test example 1
54 let result = model.predict(
55 "Spam checker",
56 &vec![
57 Feature {
58 feature_type: FeatureType::Text,
59 name: "email.body".to_owned(),
60 value: "Hey bro, This is Secretary to president want to give you some money. Please give me your credit card number ..."
61 .to_owned(),
62 },
63 Feature {
64 feature_type: FeatureType::Category,
65 name: "email.domain".to_owned(),
66 value: "example.com".to_owned(),
67 },
68 Feature {
69 feature_type: FeatureType::Gaussian,
70 name: "email.n_words".to_owned(),
71 value: "288".to_owned(),
72 },
73 ],
74 );
75
76 println!("{:?}\n", result);
77 assert!(result.get("spam").unwrap().abs() > 0.9);
78 // result will be:
79 // {"not spam": 0.02950007253794831, "spam": 0.9704999274620517}
80
81 // test example 2
82 let result = model.predict(
83 "Spam checker",
84 &vec![
85 Feature {
86 feature_type: FeatureType::Text,
87 name: "email.body".to_owned(),
88 value: "Hey bro, hotpot again?".to_owned(),
89 },
90 Feature {
91 feature_type: FeatureType::Category,
92 name: "email.domain".to_owned(),
93 value: "gmail.com".to_owned(),
94 },
95 Feature {
96 feature_type: FeatureType::Gaussian,
97 name: "email.n_words".to_owned(),
98 value: "10".to_owned(),
99 },
100 ],
101 );
102
103 println!("{:?}\n", result);
104 assert!(result.get("not spam").unwrap().abs() > 0.9);
105 // result will be:
106 // {"not spam": 0.9976790459980525, "spam": 0.002320954001947624}
107}
examples/category_mom/main.rs (line 58)
5fn main() {
6 let mut model = Model::new();
7
8 let input_train = vec![
9 (
10 "go play well".to_owned(),
11 vec![Feature {
12 feature_type: FeatureType::Category,
13 name: "weather.title".to_owned(),
14 value: "sunny".to_owned(),
15 }],
16 ),
17 (
18 "go play well".to_owned(),
19 vec![Feature {
20 feature_type: FeatureType::Category,
21 name: "weather.title".to_owned(),
22 value: "cloudy".to_owned(),
23 }],
24 ),
25 (
26 "take umbrella".to_owned(),
27 vec![Feature {
28 feature_type: FeatureType::Category,
29 name: "weather.title".to_owned(),
30 value: "rainy".to_owned(),
31 }],
32 ),
33 (
34 "take umbrella".to_owned(),
35 vec![Feature {
36 feature_type: FeatureType::Category,
37 name: "weather.title".to_owned(),
38 value: "rainy".to_owned(),
39 }],
40 ),
41 (
42 "wear more cloth".to_owned(),
43 vec![Feature {
44 feature_type: FeatureType::Category,
45 name: "weather.title".to_owned(),
46 value: "cloudy".to_owned(),
47 }],
48 ),
49 (
50 "wear more cloth".to_owned(),
51 vec![Feature {
52 feature_type: FeatureType::Category,
53 name: "weather.title".to_owned(),
54 value: "sunny".to_owned(),
55 }],
56 ),
57 ];
58 model.train("Mom's word to me before I go out", &input_train);
59
60 // note: model is trained only with "weather.title" as a category feature,
61 // the following prediction test only will take account the "weather.title" feature in the input
62 // test example 1
63 let result = model.predict(
64 "Mom's word to me before I go out",
65 &vec![
66 Feature {
67 feature_type: FeatureType::Gaussian,
68 name: "weather.degree".to_owned(),
69 value: "0.0".to_owned(),
70 },
71 Feature {
72 feature_type: FeatureType::Category,
73 name: "weather.title".to_owned(),
74 value: "sunny".to_owned(),
75 },
76 Feature {
77 feature_type: FeatureType::Gaussian,
78 name: "weather.wind.level".to_owned(),
79 value: "2".to_owned(),
80 },
81 ],
82 );
83
84 println!("{:?}\n", result);
85 assert!(result.get("wear more cloth").unwrap().abs() >= 0.4);
86 assert!(result.get("go play well").unwrap().abs() >= 0.4);
87
88 // test example 2
89 let result = model.predict(
90 "Mom's word to me before I go out",
91 &vec![
92 Feature {
93 feature_type: FeatureType::Gaussian,
94 name: "weather.degree".to_owned(),
95 value: "22".to_owned(),
96 },
97 Feature {
98 feature_type: FeatureType::Category,
99 name: "weather.title".to_owned(),
100 value: "rainy".to_owned(),
101 },
102 Feature {
103 feature_type: FeatureType::Gaussian,
104 name: "weather.wind.level".to_owned(),
105 value: "5".to_owned(),
106 },
107 ],
108 );
109
110 println!("{:?}\n", result);
111 assert!(result.get("take umbrella").unwrap().abs() > 0.5);
112
113 // test example 3
114 let result = model.predict(
115 "Mom's word to me before I go out",
116 &vec![
117 Feature {
118 feature_type: FeatureType::Gaussian,
119 name: "weather.degree".to_owned(),
120 value: "25".to_owned(),
121 },
122 Feature {
123 feature_type: FeatureType::Category,
124 name: "weather.title".to_owned(),
125 value: "cloudy".to_owned(),
126 },
127 Feature {
128 feature_type: FeatureType::Gaussian,
129 name: "weather.wind.level".to_owned(),
130 value: "3".to_owned(),
131 },
132 ],
133 );
134
135 println!("{:?}\n", result);
136 assert!(result.get("wear more cloth").unwrap().abs() >= 0.3999);
137 assert!(result.get("go play well").unwrap().abs() >= 0.3999);
138}
examples/gaussian_mom/main.rs (line 131)
5fn main() {
6 let mut model = Model::new().with_default_gaussian_m2(100.0);
7
8 let input_train = vec![
9 (
10 "go play well".to_owned(),
11 vec![
12 Feature {
13 feature_type: FeatureType::Gaussian,
14 name: "weather.degree".to_owned(),
15 value: "32".to_owned(),
16 },
17 Feature {
18 feature_type: FeatureType::Category,
19 name: "weather.title".to_owned(),
20 value: "sunny".to_owned(),
21 },
22 Feature {
23 feature_type: FeatureType::Gaussian,
24 name: "weather.wind.level".to_owned(),
25 value: "3".to_owned(),
26 },
27 ],
28 ),
29 (
30 "go play well".to_owned(),
31 vec![
32 Feature {
33 feature_type: FeatureType::Gaussian,
34 name: "weather.degree".to_owned(),
35 value: "24".to_owned(),
36 },
37 Feature {
38 feature_type: FeatureType::Category,
39 name: "weather.title".to_owned(),
40 value: "cloudy".to_owned(),
41 },
42 Feature {
43 feature_type: FeatureType::Gaussian,
44 name: "weather.wind.level".to_owned(),
45 value: "1".to_owned(),
46 },
47 ],
48 ),
49 (
50 "take umbrella".to_owned(),
51 vec![
52 Feature {
53 feature_type: FeatureType::Gaussian,
54 name: "weather.degree".to_owned(),
55 value: "5".to_owned(),
56 },
57 Feature {
58 feature_type: FeatureType::Category,
59 name: "weather.title".to_owned(),
60 value: "rainy".to_owned(),
61 },
62 Feature {
63 feature_type: FeatureType::Gaussian,
64 name: "weather.wind.level".to_owned(),
65 value: "3".to_owned(),
66 },
67 ],
68 ),
69 (
70 "take umbrella".to_owned(),
71 vec![
72 Feature {
73 feature_type: FeatureType::Gaussian,
74 name: "weather.degree".to_owned(),
75 value: "25".to_owned(),
76 },
77 Feature {
78 feature_type: FeatureType::Category,
79 name: "weather.title".to_owned(),
80 value: "rainy".to_owned(),
81 },
82 Feature {
83 feature_type: FeatureType::Gaussian,
84 name: "weather.wind.level".to_owned(),
85 value: "4".to_owned(),
86 },
87 ],
88 ),
89 (
90 "wear more cloth".to_owned(),
91 vec![
92 Feature {
93 feature_type: FeatureType::Gaussian,
94 name: "weather.degree".to_owned(),
95 value: "-2".to_owned(),
96 },
97 Feature {
98 feature_type: FeatureType::Category,
99 name: "weather.title".to_owned(),
100 value: "cloudy".to_owned(),
101 },
102 Feature {
103 feature_type: FeatureType::Gaussian,
104 name: "weather.wind.level".to_owned(),
105 value: "3".to_owned(),
106 },
107 ],
108 ),
109 (
110 "wear more cloth".to_owned(),
111 vec![
112 Feature {
113 feature_type: FeatureType::Gaussian,
114 name: "weather.degree".to_owned(),
115 value: "2".to_owned(),
116 },
117 Feature {
118 feature_type: FeatureType::Category,
119 name: "weather.title".to_owned(),
120 value: "sunny".to_owned(),
121 },
122 Feature {
123 feature_type: FeatureType::Gaussian,
124 name: "weather.wind.level".to_owned(),
125 value: "3".to_owned(),
126 },
127 ],
128 ),
129 ];
130
131 model.train("Mom's word to me before I go out", &input_train);
132
133 // test example 1
134 let result = model.predict(
135 "Mom's word to me before I go out",
136 &vec![
137 Feature {
138 feature_type: FeatureType::Gaussian,
139 name: "weather.degree".to_owned(),
140 value: "0.0".to_owned(),
141 },
142 Feature {
143 feature_type: FeatureType::Category,
144 name: "weather.title".to_owned(),
145 value: "sunny".to_owned(),
146 },
147 Feature {
148 feature_type: FeatureType::Gaussian,
149 name: "weather.wind.level".to_owned(),
150 value: "2".to_owned(),
151 },
152 ],
153 );
154
155 println!("{:?}\n", result);
156 assert!(result.get("wear more cloth").unwrap().abs() > 0.7);
157 // result will be:
158 // {"wear more cloth": 0.8145285759525068, "go play well": 0.1310511820033621, "take umbrella": 0.05442024204413106}
159
160 // test example 2
161 let result = model.predict(
162 "Mom's word to me before I go out",
163 &vec![
164 Feature {
165 feature_type: FeatureType::Gaussian,
166 name: "weather.degree".to_owned(),
167 value: "28".to_owned(),
168 },
169 Feature {
170 feature_type: FeatureType::Category,
171 name: "weather.title".to_owned(),
172 value: "rainy".to_owned(),
173 },
174 Feature {
175 feature_type: FeatureType::Gaussian,
176 name: "weather.wind.level".to_owned(),
177 value: "5".to_owned(),
178 },
179 ],
180 );
181
182 println!("{:?}\n", result);
183 assert!(result.get("take umbrella").unwrap().abs() > 0.6);
184 // result will be:
185 // {"wear more cloth": 0.040777064361781155, "take umbrella": 0.6929647650603867, "go play well": 0.2662581705778321}
186
187 // test example 3
188 let result = model.predict(
189 "Mom's word to me before I go out",
190 &vec![
191 Feature {
192 feature_type: FeatureType::Gaussian,
193 name: "weather.degree".to_owned(),
194 value: "25".to_owned(),
195 },
196 Feature {
197 feature_type: FeatureType::Category,
198 name: "weather.title".to_owned(),
199 value: "cloudy".to_owned(),
200 },
201 Feature {
202 feature_type: FeatureType::Gaussian,
203 name: "weather.wind.level".to_owned(),
204 value: "3".to_owned(),
205 },
206 ],
207 );
208
209 println!("{:?}\n", result);
210 assert!(result.get("go play well").unwrap().abs() > 0.5);
211 // result will be:
212 // {"go play well": 0.6267604626518958, "wear more cloth": 0.14149599917558417, "take umbrella": 0.23174353817252016}
213}
Sourcepub fn predict(
&self,
model_name: &str,
features: &[Feature],
) -> HashMap<String, f64>
pub fn predict( &self, model_name: &str, features: &[Feature], ) -> HashMap<String, f64>
Examples found in repository?
examples/20newsgroup/main.rs (line 32)
12fn main() {
13 let mut model = Model::new();
14
15 let train_data = load_txt("examples/data/20newsgroup_train.txt");
16 let test_data = load_txt("examples/data/20newsgroup_test.txt");
17
18 let test_data_len = test_data.len();
19
20 println!(
21 "Train size: {}, test size: {}",
22 train_data.len(),
23 test_data.len()
24 );
25
26 model.train("20newsgroup_model", &train_data);
27 println!("Training finished");
28
29 let total_test_score: f64 = test_data
30 .into_par_iter()
31 .map(|(test_label, features)| {
32 let predict = model.predict("20newsgroup_model", &features);
33 let (pred_label, _test_score) = predict
34 .iter()
35 .max_by(|(_ka, va), (_kb, vb)| va.partial_cmp(vb).unwrap())
36 .unwrap();
37 if &test_label == pred_label {
38 1.0
39 } else {
40 0.0
41 }
42 })
43 .sum();
44
45 println!("Testing finished");
46
47 let score = total_test_score / test_data_len as f64;
48
49 println!("test score: {}", score);
50 assert!((0.5771375464684015 - score).abs() < 1e-10);
51 // old master gives 0.5785979819437068
52 // blayze gives 0.5770609318996416
53 // pblayze gives 0.5770609318996416
54 // python 0.5779341
55}
More examples
examples/spam/main.rs (lines 54-74)
5fn main() {
6 let mut model = Model::new();
7
8 let input_train = vec![
9 (
10 "spam".to_owned(),
11 vec![
12 Feature {
13 feature_type: FeatureType::Text,
14 name: "email.body".to_owned(),
15 value: "Good day dear beneficiary. This is Secretary to president of Benin republic is writing this email ... heritage, tax, dollars, money, credit card...".to_owned(),
16 },
17 Feature {
18 feature_type: FeatureType::Category,
19 name: "email.domain".to_owned(),
20 value: "evil.com".to_owned(),
21 },
22 Feature {
23 feature_type: FeatureType::Gaussian,
24 name: "email.n_words".to_owned(),
25 value: "482".to_owned(),
26 },
27 ],
28 ),
29 (
30 "not spam".to_owned(),
31 vec![
32 Feature {
33 feature_type: FeatureType::Text,
34 name: "email.body".to_owned(),
35 value: "Hey bro, how's work these days, wanna join me for hotpot next week?".to_owned(),
36 },
37 Feature {
38 feature_type: FeatureType::Category,
39 name: "email.domain".to_owned(),
40 value: "gmail.com".to_owned(),
41 },
42 Feature {
43 feature_type: FeatureType::Gaussian,
44 name: "email.n_words".to_owned(),
45 value: "42".to_owned(),
46 },
47 ],
48 ),
49 ];
50
51 model.train("Spam checker", &input_train);
52
53 // test example 1
54 let result = model.predict(
55 "Spam checker",
56 &vec![
57 Feature {
58 feature_type: FeatureType::Text,
59 name: "email.body".to_owned(),
60 value: "Hey bro, This is Secretary to president want to give you some money. Please give me your credit card number ..."
61 .to_owned(),
62 },
63 Feature {
64 feature_type: FeatureType::Category,
65 name: "email.domain".to_owned(),
66 value: "example.com".to_owned(),
67 },
68 Feature {
69 feature_type: FeatureType::Gaussian,
70 name: "email.n_words".to_owned(),
71 value: "288".to_owned(),
72 },
73 ],
74 );
75
76 println!("{:?}\n", result);
77 assert!(result.get("spam").unwrap().abs() > 0.9);
78 // result will be:
79 // {"not spam": 0.02950007253794831, "spam": 0.9704999274620517}
80
81 // test example 2
82 let result = model.predict(
83 "Spam checker",
84 &vec![
85 Feature {
86 feature_type: FeatureType::Text,
87 name: "email.body".to_owned(),
88 value: "Hey bro, hotpot again?".to_owned(),
89 },
90 Feature {
91 feature_type: FeatureType::Category,
92 name: "email.domain".to_owned(),
93 value: "gmail.com".to_owned(),
94 },
95 Feature {
96 feature_type: FeatureType::Gaussian,
97 name: "email.n_words".to_owned(),
98 value: "10".to_owned(),
99 },
100 ],
101 );
102
103 println!("{:?}\n", result);
104 assert!(result.get("not spam").unwrap().abs() > 0.9);
105 // result will be:
106 // {"not spam": 0.9976790459980525, "spam": 0.002320954001947624}
107}
examples/category_mom/main.rs (lines 63-82)
5fn main() {
6 let mut model = Model::new();
7
8 let input_train = vec![
9 (
10 "go play well".to_owned(),
11 vec![Feature {
12 feature_type: FeatureType::Category,
13 name: "weather.title".to_owned(),
14 value: "sunny".to_owned(),
15 }],
16 ),
17 (
18 "go play well".to_owned(),
19 vec![Feature {
20 feature_type: FeatureType::Category,
21 name: "weather.title".to_owned(),
22 value: "cloudy".to_owned(),
23 }],
24 ),
25 (
26 "take umbrella".to_owned(),
27 vec![Feature {
28 feature_type: FeatureType::Category,
29 name: "weather.title".to_owned(),
30 value: "rainy".to_owned(),
31 }],
32 ),
33 (
34 "take umbrella".to_owned(),
35 vec![Feature {
36 feature_type: FeatureType::Category,
37 name: "weather.title".to_owned(),
38 value: "rainy".to_owned(),
39 }],
40 ),
41 (
42 "wear more cloth".to_owned(),
43 vec![Feature {
44 feature_type: FeatureType::Category,
45 name: "weather.title".to_owned(),
46 value: "cloudy".to_owned(),
47 }],
48 ),
49 (
50 "wear more cloth".to_owned(),
51 vec![Feature {
52 feature_type: FeatureType::Category,
53 name: "weather.title".to_owned(),
54 value: "sunny".to_owned(),
55 }],
56 ),
57 ];
58 model.train("Mom's word to me before I go out", &input_train);
59
60 // note: model is trained only with "weather.title" as a category feature,
61 // the following prediction test only will take account the "weather.title" feature in the input
62 // test example 1
63 let result = model.predict(
64 "Mom's word to me before I go out",
65 &vec![
66 Feature {
67 feature_type: FeatureType::Gaussian,
68 name: "weather.degree".to_owned(),
69 value: "0.0".to_owned(),
70 },
71 Feature {
72 feature_type: FeatureType::Category,
73 name: "weather.title".to_owned(),
74 value: "sunny".to_owned(),
75 },
76 Feature {
77 feature_type: FeatureType::Gaussian,
78 name: "weather.wind.level".to_owned(),
79 value: "2".to_owned(),
80 },
81 ],
82 );
83
84 println!("{:?}\n", result);
85 assert!(result.get("wear more cloth").unwrap().abs() >= 0.4);
86 assert!(result.get("go play well").unwrap().abs() >= 0.4);
87
88 // test example 2
89 let result = model.predict(
90 "Mom's word to me before I go out",
91 &vec![
92 Feature {
93 feature_type: FeatureType::Gaussian,
94 name: "weather.degree".to_owned(),
95 value: "22".to_owned(),
96 },
97 Feature {
98 feature_type: FeatureType::Category,
99 name: "weather.title".to_owned(),
100 value: "rainy".to_owned(),
101 },
102 Feature {
103 feature_type: FeatureType::Gaussian,
104 name: "weather.wind.level".to_owned(),
105 value: "5".to_owned(),
106 },
107 ],
108 );
109
110 println!("{:?}\n", result);
111 assert!(result.get("take umbrella").unwrap().abs() > 0.5);
112
113 // test example 3
114 let result = model.predict(
115 "Mom's word to me before I go out",
116 &vec![
117 Feature {
118 feature_type: FeatureType::Gaussian,
119 name: "weather.degree".to_owned(),
120 value: "25".to_owned(),
121 },
122 Feature {
123 feature_type: FeatureType::Category,
124 name: "weather.title".to_owned(),
125 value: "cloudy".to_owned(),
126 },
127 Feature {
128 feature_type: FeatureType::Gaussian,
129 name: "weather.wind.level".to_owned(),
130 value: "3".to_owned(),
131 },
132 ],
133 );
134
135 println!("{:?}\n", result);
136 assert!(result.get("wear more cloth").unwrap().abs() >= 0.3999);
137 assert!(result.get("go play well").unwrap().abs() >= 0.3999);
138}
examples/gaussian_mom/main.rs (lines 134-153)
5fn main() {
6 let mut model = Model::new().with_default_gaussian_m2(100.0);
7
8 let input_train = vec![
9 (
10 "go play well".to_owned(),
11 vec![
12 Feature {
13 feature_type: FeatureType::Gaussian,
14 name: "weather.degree".to_owned(),
15 value: "32".to_owned(),
16 },
17 Feature {
18 feature_type: FeatureType::Category,
19 name: "weather.title".to_owned(),
20 value: "sunny".to_owned(),
21 },
22 Feature {
23 feature_type: FeatureType::Gaussian,
24 name: "weather.wind.level".to_owned(),
25 value: "3".to_owned(),
26 },
27 ],
28 ),
29 (
30 "go play well".to_owned(),
31 vec![
32 Feature {
33 feature_type: FeatureType::Gaussian,
34 name: "weather.degree".to_owned(),
35 value: "24".to_owned(),
36 },
37 Feature {
38 feature_type: FeatureType::Category,
39 name: "weather.title".to_owned(),
40 value: "cloudy".to_owned(),
41 },
42 Feature {
43 feature_type: FeatureType::Gaussian,
44 name: "weather.wind.level".to_owned(),
45 value: "1".to_owned(),
46 },
47 ],
48 ),
49 (
50 "take umbrella".to_owned(),
51 vec![
52 Feature {
53 feature_type: FeatureType::Gaussian,
54 name: "weather.degree".to_owned(),
55 value: "5".to_owned(),
56 },
57 Feature {
58 feature_type: FeatureType::Category,
59 name: "weather.title".to_owned(),
60 value: "rainy".to_owned(),
61 },
62 Feature {
63 feature_type: FeatureType::Gaussian,
64 name: "weather.wind.level".to_owned(),
65 value: "3".to_owned(),
66 },
67 ],
68 ),
69 (
70 "take umbrella".to_owned(),
71 vec![
72 Feature {
73 feature_type: FeatureType::Gaussian,
74 name: "weather.degree".to_owned(),
75 value: "25".to_owned(),
76 },
77 Feature {
78 feature_type: FeatureType::Category,
79 name: "weather.title".to_owned(),
80 value: "rainy".to_owned(),
81 },
82 Feature {
83 feature_type: FeatureType::Gaussian,
84 name: "weather.wind.level".to_owned(),
85 value: "4".to_owned(),
86 },
87 ],
88 ),
89 (
90 "wear more cloth".to_owned(),
91 vec![
92 Feature {
93 feature_type: FeatureType::Gaussian,
94 name: "weather.degree".to_owned(),
95 value: "-2".to_owned(),
96 },
97 Feature {
98 feature_type: FeatureType::Category,
99 name: "weather.title".to_owned(),
100 value: "cloudy".to_owned(),
101 },
102 Feature {
103 feature_type: FeatureType::Gaussian,
104 name: "weather.wind.level".to_owned(),
105 value: "3".to_owned(),
106 },
107 ],
108 ),
109 (
110 "wear more cloth".to_owned(),
111 vec![
112 Feature {
113 feature_type: FeatureType::Gaussian,
114 name: "weather.degree".to_owned(),
115 value: "2".to_owned(),
116 },
117 Feature {
118 feature_type: FeatureType::Category,
119 name: "weather.title".to_owned(),
120 value: "sunny".to_owned(),
121 },
122 Feature {
123 feature_type: FeatureType::Gaussian,
124 name: "weather.wind.level".to_owned(),
125 value: "3".to_owned(),
126 },
127 ],
128 ),
129 ];
130
131 model.train("Mom's word to me before I go out", &input_train);
132
133 // test example 1
134 let result = model.predict(
135 "Mom's word to me before I go out",
136 &vec![
137 Feature {
138 feature_type: FeatureType::Gaussian,
139 name: "weather.degree".to_owned(),
140 value: "0.0".to_owned(),
141 },
142 Feature {
143 feature_type: FeatureType::Category,
144 name: "weather.title".to_owned(),
145 value: "sunny".to_owned(),
146 },
147 Feature {
148 feature_type: FeatureType::Gaussian,
149 name: "weather.wind.level".to_owned(),
150 value: "2".to_owned(),
151 },
152 ],
153 );
154
155 println!("{:?}\n", result);
156 assert!(result.get("wear more cloth").unwrap().abs() > 0.7);
157 // result will be:
158 // {"wear more cloth": 0.8145285759525068, "go play well": 0.1310511820033621, "take umbrella": 0.05442024204413106}
159
160 // test example 2
161 let result = model.predict(
162 "Mom's word to me before I go out",
163 &vec![
164 Feature {
165 feature_type: FeatureType::Gaussian,
166 name: "weather.degree".to_owned(),
167 value: "28".to_owned(),
168 },
169 Feature {
170 feature_type: FeatureType::Category,
171 name: "weather.title".to_owned(),
172 value: "rainy".to_owned(),
173 },
174 Feature {
175 feature_type: FeatureType::Gaussian,
176 name: "weather.wind.level".to_owned(),
177 value: "5".to_owned(),
178 },
179 ],
180 );
181
182 println!("{:?}\n", result);
183 assert!(result.get("take umbrella").unwrap().abs() > 0.6);
184 // result will be:
185 // {"wear more cloth": 0.040777064361781155, "take umbrella": 0.6929647650603867, "go play well": 0.2662581705778321}
186
187 // test example 3
188 let result = model.predict(
189 "Mom's word to me before I go out",
190 &vec![
191 Feature {
192 feature_type: FeatureType::Gaussian,
193 name: "weather.degree".to_owned(),
194 value: "25".to_owned(),
195 },
196 Feature {
197 feature_type: FeatureType::Category,
198 name: "weather.title".to_owned(),
199 value: "cloudy".to_owned(),
200 },
201 Feature {
202 feature_type: FeatureType::Gaussian,
203 name: "weather.wind.level".to_owned(),
204 value: "3".to_owned(),
205 },
206 ],
207 );
208
209 println!("{:?}\n", result);
210 assert!(result.get("go play well").unwrap().abs() > 0.5);
211 // result will be:
212 // {"go play well": 0.6267604626518958, "wear more cloth": 0.14149599917558417, "take umbrella": 0.23174353817252016}
213}
Sourcepub fn predict_batch(
&self,
model_name: &str,
features_vec: &[Vec<Feature>],
) -> Vec<HashMap<String, f64>>
pub fn predict_batch( &self, model_name: &str, features_vec: &[Vec<Feature>], ) -> Vec<HashMap<String, f64>>
Examples found in repository?
examples/uci_adult/main.rs (line 46)
26fn main() {
27 let mut model = Model::new()
28 .with_prior_factor(1.0)
29 .with_pseudo_count(0.1)
30 .with_default_gaussian_sigma_factor(0.05);
31
32 let train_data = load_txt("examples/data/adult.data");
33 let test_data = load_txt("examples/data/adult.test");
34 let (test_labels, test_features): (Vec<String>, Vec<Vec<Feature>>) =
35 test_data.into_iter().map(|(s, v)| (s, v)).unzip();
36
37 println!(
38 "Train size: {}, test size: {}",
39 train_data.len(),
40 test_features.len()
41 );
42
43 model.train("uci_adult", &train_data);
44 println!("Training finished");
45
46 let predicts = model.predict_batch("uci_adult", &test_features);
47 println!("Testing finished");
48
49 let total_test_score: f64 = test_labels
50 .iter()
51 .zip(predicts.iter())
52 .map(|(test_label, predict)| {
53 let (pred_label, _test_score) = predict
54 .iter()
55 .max_by(|(_ka, va), (_kb, vb)| va.partial_cmp(vb).unwrap())
56 .unwrap();
57
58 if test_label == pred_label {
59 1.0
60 } else {
61 0.0
62 }
63 })
64 .sum();
65
66 let score = total_test_score / test_labels.len() as f64;
67
68 println!("test score: {}", score);
69 assert!((0.83 - score).abs() < 1e-2);
70}
More examples
examples/20newsgroup_stopwords/main.rs (line 28)
9fn main() {
10 let mut model = Model::new()
11 .with_stop_words_file("examples/data/english-stop-words-large.txt")
12 .with_pseudo_count(0.1);
13
14 let train_data = load_txt("examples/data/20newsgroup_train.txt");
15 let test_data = load_txt("examples/data/20newsgroup_test.txt");
16 let (test_labels, test_features): (Vec<String>, Vec<Vec<Feature>>) =
17 test_data.into_iter().map(|(s, v)| (s, v)).unzip();
18
19 println!(
20 "Train size: {}, test size: {}",
21 train_data.len(),
22 test_labels.len()
23 );
24
25 model.train("20newsgroup_model", &train_data);
26 println!("Training finished");
27
28 let predicts = model.predict_batch("20newsgroup_model", &test_features);
29 println!("Testing finished");
30
31 let total_test_score: f64 = test_labels
32 .iter()
33 .zip(predicts.iter())
34 .map(|(test_label, predict)| {
35 let (pred_label, _test_score) = predict
36 .iter()
37 .max_by(|(_ka, va), (_kb, vb)| va.partial_cmp(vb).unwrap())
38 .unwrap();
39 if test_label == pred_label {
40 1.0
41 } else {
42 0.0
43 }
44 })
45 .sum();
46 let score = total_test_score / test_labels.len() as f64;
47
48 println!("test score: {}", score);
49 assert!((0.66 - score).abs() < 1e-3);
50}
Source§impl Model<ModelHashMapStore>
impl Model<ModelHashMapStore>
Sourcepub fn new() -> Model<ModelHashMapStore>
pub fn new() -> Model<ModelHashMapStore>
Examples found in repository?
examples/20newsgroup/main.rs (line 13)
12fn main() {
13 let mut model = Model::new();
14
15 let train_data = load_txt("examples/data/20newsgroup_train.txt");
16 let test_data = load_txt("examples/data/20newsgroup_test.txt");
17
18 let test_data_len = test_data.len();
19
20 println!(
21 "Train size: {}, test size: {}",
22 train_data.len(),
23 test_data.len()
24 );
25
26 model.train("20newsgroup_model", &train_data);
27 println!("Training finished");
28
29 let total_test_score: f64 = test_data
30 .into_par_iter()
31 .map(|(test_label, features)| {
32 let predict = model.predict("20newsgroup_model", &features);
33 let (pred_label, _test_score) = predict
34 .iter()
35 .max_by(|(_ka, va), (_kb, vb)| va.partial_cmp(vb).unwrap())
36 .unwrap();
37 if &test_label == pred_label {
38 1.0
39 } else {
40 0.0
41 }
42 })
43 .sum();
44
45 println!("Testing finished");
46
47 let score = total_test_score / test_data_len as f64;
48
49 println!("test score: {}", score);
50 assert!((0.5771375464684015 - score).abs() < 1e-10);
51 // old master gives 0.5785979819437068
52 // blayze gives 0.5770609318996416
53 // pblayze gives 0.5770609318996416
54 // python 0.5779341
55}
More examples
examples/uci_adult/main.rs (line 27)
26fn main() {
27 let mut model = Model::new()
28 .with_prior_factor(1.0)
29 .with_pseudo_count(0.1)
30 .with_default_gaussian_sigma_factor(0.05);
31
32 let train_data = load_txt("examples/data/adult.data");
33 let test_data = load_txt("examples/data/adult.test");
34 let (test_labels, test_features): (Vec<String>, Vec<Vec<Feature>>) =
35 test_data.into_iter().map(|(s, v)| (s, v)).unzip();
36
37 println!(
38 "Train size: {}, test size: {}",
39 train_data.len(),
40 test_features.len()
41 );
42
43 model.train("uci_adult", &train_data);
44 println!("Training finished");
45
46 let predicts = model.predict_batch("uci_adult", &test_features);
47 println!("Testing finished");
48
49 let total_test_score: f64 = test_labels
50 .iter()
51 .zip(predicts.iter())
52 .map(|(test_label, predict)| {
53 let (pred_label, _test_score) = predict
54 .iter()
55 .max_by(|(_ka, va), (_kb, vb)| va.partial_cmp(vb).unwrap())
56 .unwrap();
57
58 if test_label == pred_label {
59 1.0
60 } else {
61 0.0
62 }
63 })
64 .sum();
65
66 let score = total_test_score / test_labels.len() as f64;
67
68 println!("test score: {}", score);
69 assert!((0.83 - score).abs() < 1e-2);
70}
examples/20newsgroup_stopwords/main.rs (line 10)
9fn main() {
10 let mut model = Model::new()
11 .with_stop_words_file("examples/data/english-stop-words-large.txt")
12 .with_pseudo_count(0.1);
13
14 let train_data = load_txt("examples/data/20newsgroup_train.txt");
15 let test_data = load_txt("examples/data/20newsgroup_test.txt");
16 let (test_labels, test_features): (Vec<String>, Vec<Vec<Feature>>) =
17 test_data.into_iter().map(|(s, v)| (s, v)).unzip();
18
19 println!(
20 "Train size: {}, test size: {}",
21 train_data.len(),
22 test_labels.len()
23 );
24
25 model.train("20newsgroup_model", &train_data);
26 println!("Training finished");
27
28 let predicts = model.predict_batch("20newsgroup_model", &test_features);
29 println!("Testing finished");
30
31 let total_test_score: f64 = test_labels
32 .iter()
33 .zip(predicts.iter())
34 .map(|(test_label, predict)| {
35 let (pred_label, _test_score) = predict
36 .iter()
37 .max_by(|(_ka, va), (_kb, vb)| va.partial_cmp(vb).unwrap())
38 .unwrap();
39 if test_label == pred_label {
40 1.0
41 } else {
42 0.0
43 }
44 })
45 .sum();
46 let score = total_test_score / test_labels.len() as f64;
47
48 println!("test score: {}", score);
49 assert!((0.66 - score).abs() < 1e-3);
50}
examples/spam/main.rs (line 6)
5fn main() {
6 let mut model = Model::new();
7
8 let input_train = vec![
9 (
10 "spam".to_owned(),
11 vec![
12 Feature {
13 feature_type: FeatureType::Text,
14 name: "email.body".to_owned(),
15 value: "Good day dear beneficiary. This is Secretary to president of Benin republic is writing this email ... heritage, tax, dollars, money, credit card...".to_owned(),
16 },
17 Feature {
18 feature_type: FeatureType::Category,
19 name: "email.domain".to_owned(),
20 value: "evil.com".to_owned(),
21 },
22 Feature {
23 feature_type: FeatureType::Gaussian,
24 name: "email.n_words".to_owned(),
25 value: "482".to_owned(),
26 },
27 ],
28 ),
29 (
30 "not spam".to_owned(),
31 vec![
32 Feature {
33 feature_type: FeatureType::Text,
34 name: "email.body".to_owned(),
35 value: "Hey bro, how's work these days, wanna join me for hotpot next week?".to_owned(),
36 },
37 Feature {
38 feature_type: FeatureType::Category,
39 name: "email.domain".to_owned(),
40 value: "gmail.com".to_owned(),
41 },
42 Feature {
43 feature_type: FeatureType::Gaussian,
44 name: "email.n_words".to_owned(),
45 value: "42".to_owned(),
46 },
47 ],
48 ),
49 ];
50
51 model.train("Spam checker", &input_train);
52
53 // test example 1
54 let result = model.predict(
55 "Spam checker",
56 &vec![
57 Feature {
58 feature_type: FeatureType::Text,
59 name: "email.body".to_owned(),
60 value: "Hey bro, This is Secretary to president want to give you some money. Please give me your credit card number ..."
61 .to_owned(),
62 },
63 Feature {
64 feature_type: FeatureType::Category,
65 name: "email.domain".to_owned(),
66 value: "example.com".to_owned(),
67 },
68 Feature {
69 feature_type: FeatureType::Gaussian,
70 name: "email.n_words".to_owned(),
71 value: "288".to_owned(),
72 },
73 ],
74 );
75
76 println!("{:?}\n", result);
77 assert!(result.get("spam").unwrap().abs() > 0.9);
78 // result will be:
79 // {"not spam": 0.02950007253794831, "spam": 0.9704999274620517}
80
81 // test example 2
82 let result = model.predict(
83 "Spam checker",
84 &vec![
85 Feature {
86 feature_type: FeatureType::Text,
87 name: "email.body".to_owned(),
88 value: "Hey bro, hotpot again?".to_owned(),
89 },
90 Feature {
91 feature_type: FeatureType::Category,
92 name: "email.domain".to_owned(),
93 value: "gmail.com".to_owned(),
94 },
95 Feature {
96 feature_type: FeatureType::Gaussian,
97 name: "email.n_words".to_owned(),
98 value: "10".to_owned(),
99 },
100 ],
101 );
102
103 println!("{:?}\n", result);
104 assert!(result.get("not spam").unwrap().abs() > 0.9);
105 // result will be:
106 // {"not spam": 0.9976790459980525, "spam": 0.002320954001947624}
107}
examples/category_mom/main.rs (line 6)
5fn main() {
6 let mut model = Model::new();
7
8 let input_train = vec![
9 (
10 "go play well".to_owned(),
11 vec![Feature {
12 feature_type: FeatureType::Category,
13 name: "weather.title".to_owned(),
14 value: "sunny".to_owned(),
15 }],
16 ),
17 (
18 "go play well".to_owned(),
19 vec![Feature {
20 feature_type: FeatureType::Category,
21 name: "weather.title".to_owned(),
22 value: "cloudy".to_owned(),
23 }],
24 ),
25 (
26 "take umbrella".to_owned(),
27 vec![Feature {
28 feature_type: FeatureType::Category,
29 name: "weather.title".to_owned(),
30 value: "rainy".to_owned(),
31 }],
32 ),
33 (
34 "take umbrella".to_owned(),
35 vec![Feature {
36 feature_type: FeatureType::Category,
37 name: "weather.title".to_owned(),
38 value: "rainy".to_owned(),
39 }],
40 ),
41 (
42 "wear more cloth".to_owned(),
43 vec![Feature {
44 feature_type: FeatureType::Category,
45 name: "weather.title".to_owned(),
46 value: "cloudy".to_owned(),
47 }],
48 ),
49 (
50 "wear more cloth".to_owned(),
51 vec![Feature {
52 feature_type: FeatureType::Category,
53 name: "weather.title".to_owned(),
54 value: "sunny".to_owned(),
55 }],
56 ),
57 ];
58 model.train("Mom's word to me before I go out", &input_train);
59
60 // note: model is trained only with "weather.title" as a category feature,
61 // the following prediction test only will take account the "weather.title" feature in the input
62 // test example 1
63 let result = model.predict(
64 "Mom's word to me before I go out",
65 &vec![
66 Feature {
67 feature_type: FeatureType::Gaussian,
68 name: "weather.degree".to_owned(),
69 value: "0.0".to_owned(),
70 },
71 Feature {
72 feature_type: FeatureType::Category,
73 name: "weather.title".to_owned(),
74 value: "sunny".to_owned(),
75 },
76 Feature {
77 feature_type: FeatureType::Gaussian,
78 name: "weather.wind.level".to_owned(),
79 value: "2".to_owned(),
80 },
81 ],
82 );
83
84 println!("{:?}\n", result);
85 assert!(result.get("wear more cloth").unwrap().abs() >= 0.4);
86 assert!(result.get("go play well").unwrap().abs() >= 0.4);
87
88 // test example 2
89 let result = model.predict(
90 "Mom's word to me before I go out",
91 &vec![
92 Feature {
93 feature_type: FeatureType::Gaussian,
94 name: "weather.degree".to_owned(),
95 value: "22".to_owned(),
96 },
97 Feature {
98 feature_type: FeatureType::Category,
99 name: "weather.title".to_owned(),
100 value: "rainy".to_owned(),
101 },
102 Feature {
103 feature_type: FeatureType::Gaussian,
104 name: "weather.wind.level".to_owned(),
105 value: "5".to_owned(),
106 },
107 ],
108 );
109
110 println!("{:?}\n", result);
111 assert!(result.get("take umbrella").unwrap().abs() > 0.5);
112
113 // test example 3
114 let result = model.predict(
115 "Mom's word to me before I go out",
116 &vec![
117 Feature {
118 feature_type: FeatureType::Gaussian,
119 name: "weather.degree".to_owned(),
120 value: "25".to_owned(),
121 },
122 Feature {
123 feature_type: FeatureType::Category,
124 name: "weather.title".to_owned(),
125 value: "cloudy".to_owned(),
126 },
127 Feature {
128 feature_type: FeatureType::Gaussian,
129 name: "weather.wind.level".to_owned(),
130 value: "3".to_owned(),
131 },
132 ],
133 );
134
135 println!("{:?}\n", result);
136 assert!(result.get("wear more cloth").unwrap().abs() >= 0.3999);
137 assert!(result.get("go play well").unwrap().abs() >= 0.3999);
138}
examples/gaussian_mom/main.rs (line 6)
5fn main() {
6 let mut model = Model::new().with_default_gaussian_m2(100.0);
7
8 let input_train = vec![
9 (
10 "go play well".to_owned(),
11 vec![
12 Feature {
13 feature_type: FeatureType::Gaussian,
14 name: "weather.degree".to_owned(),
15 value: "32".to_owned(),
16 },
17 Feature {
18 feature_type: FeatureType::Category,
19 name: "weather.title".to_owned(),
20 value: "sunny".to_owned(),
21 },
22 Feature {
23 feature_type: FeatureType::Gaussian,
24 name: "weather.wind.level".to_owned(),
25 value: "3".to_owned(),
26 },
27 ],
28 ),
29 (
30 "go play well".to_owned(),
31 vec![
32 Feature {
33 feature_type: FeatureType::Gaussian,
34 name: "weather.degree".to_owned(),
35 value: "24".to_owned(),
36 },
37 Feature {
38 feature_type: FeatureType::Category,
39 name: "weather.title".to_owned(),
40 value: "cloudy".to_owned(),
41 },
42 Feature {
43 feature_type: FeatureType::Gaussian,
44 name: "weather.wind.level".to_owned(),
45 value: "1".to_owned(),
46 },
47 ],
48 ),
49 (
50 "take umbrella".to_owned(),
51 vec![
52 Feature {
53 feature_type: FeatureType::Gaussian,
54 name: "weather.degree".to_owned(),
55 value: "5".to_owned(),
56 },
57 Feature {
58 feature_type: FeatureType::Category,
59 name: "weather.title".to_owned(),
60 value: "rainy".to_owned(),
61 },
62 Feature {
63 feature_type: FeatureType::Gaussian,
64 name: "weather.wind.level".to_owned(),
65 value: "3".to_owned(),
66 },
67 ],
68 ),
69 (
70 "take umbrella".to_owned(),
71 vec![
72 Feature {
73 feature_type: FeatureType::Gaussian,
74 name: "weather.degree".to_owned(),
75 value: "25".to_owned(),
76 },
77 Feature {
78 feature_type: FeatureType::Category,
79 name: "weather.title".to_owned(),
80 value: "rainy".to_owned(),
81 },
82 Feature {
83 feature_type: FeatureType::Gaussian,
84 name: "weather.wind.level".to_owned(),
85 value: "4".to_owned(),
86 },
87 ],
88 ),
89 (
90 "wear more cloth".to_owned(),
91 vec![
92 Feature {
93 feature_type: FeatureType::Gaussian,
94 name: "weather.degree".to_owned(),
95 value: "-2".to_owned(),
96 },
97 Feature {
98 feature_type: FeatureType::Category,
99 name: "weather.title".to_owned(),
100 value: "cloudy".to_owned(),
101 },
102 Feature {
103 feature_type: FeatureType::Gaussian,
104 name: "weather.wind.level".to_owned(),
105 value: "3".to_owned(),
106 },
107 ],
108 ),
109 (
110 "wear more cloth".to_owned(),
111 vec![
112 Feature {
113 feature_type: FeatureType::Gaussian,
114 name: "weather.degree".to_owned(),
115 value: "2".to_owned(),
116 },
117 Feature {
118 feature_type: FeatureType::Category,
119 name: "weather.title".to_owned(),
120 value: "sunny".to_owned(),
121 },
122 Feature {
123 feature_type: FeatureType::Gaussian,
124 name: "weather.wind.level".to_owned(),
125 value: "3".to_owned(),
126 },
127 ],
128 ),
129 ];
130
131 model.train("Mom's word to me before I go out", &input_train);
132
133 // test example 1
134 let result = model.predict(
135 "Mom's word to me before I go out",
136 &vec![
137 Feature {
138 feature_type: FeatureType::Gaussian,
139 name: "weather.degree".to_owned(),
140 value: "0.0".to_owned(),
141 },
142 Feature {
143 feature_type: FeatureType::Category,
144 name: "weather.title".to_owned(),
145 value: "sunny".to_owned(),
146 },
147 Feature {
148 feature_type: FeatureType::Gaussian,
149 name: "weather.wind.level".to_owned(),
150 value: "2".to_owned(),
151 },
152 ],
153 );
154
155 println!("{:?}\n", result);
156 assert!(result.get("wear more cloth").unwrap().abs() > 0.7);
157 // result will be:
158 // {"wear more cloth": 0.8145285759525068, "go play well": 0.1310511820033621, "take umbrella": 0.05442024204413106}
159
160 // test example 2
161 let result = model.predict(
162 "Mom's word to me before I go out",
163 &vec![
164 Feature {
165 feature_type: FeatureType::Gaussian,
166 name: "weather.degree".to_owned(),
167 value: "28".to_owned(),
168 },
169 Feature {
170 feature_type: FeatureType::Category,
171 name: "weather.title".to_owned(),
172 value: "rainy".to_owned(),
173 },
174 Feature {
175 feature_type: FeatureType::Gaussian,
176 name: "weather.wind.level".to_owned(),
177 value: "5".to_owned(),
178 },
179 ],
180 );
181
182 println!("{:?}\n", result);
183 assert!(result.get("take umbrella").unwrap().abs() > 0.6);
184 // result will be:
185 // {"wear more cloth": 0.040777064361781155, "take umbrella": 0.6929647650603867, "go play well": 0.2662581705778321}
186
187 // test example 3
188 let result = model.predict(
189 "Mom's word to me before I go out",
190 &vec![
191 Feature {
192 feature_type: FeatureType::Gaussian,
193 name: "weather.degree".to_owned(),
194 value: "25".to_owned(),
195 },
196 Feature {
197 feature_type: FeatureType::Category,
198 name: "weather.title".to_owned(),
199 value: "cloudy".to_owned(),
200 },
201 Feature {
202 feature_type: FeatureType::Gaussian,
203 name: "weather.wind.level".to_owned(),
204 value: "3".to_owned(),
205 },
206 ],
207 );
208
209 println!("{:?}\n", result);
210 assert!(result.get("go play well").unwrap().abs() > 0.5);
211 // result will be:
212 // {"go play well": 0.6267604626518958, "wear more cloth": 0.14149599917558417, "take umbrella": 0.23174353817252016}
213}
Trait Implementations§
Source§impl<'de, T> Deserialize<'de> for Model<T>
impl<'de, T> Deserialize<'de> for Model<T>
Source§fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
Deserialize this value from the given Serde deserializer. Read more
Auto Trait Implementations§
impl<T> !Freeze for Model<T>
impl<T> !RefUnwindSafe for Model<T>
impl<T> Send for Model<T>where
T: Send,
impl<T> Sync for Model<T>
impl<T> Unpin for Model<T>where
T: Unpin,
impl<T> UnwindSafe for Model<T>where
T: UnwindSafe,
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more