Struct Model

Source
pub struct Model<T: ModelStore + Sync> { /* private fields */ }

Implementations§

Source§

impl<T: ModelStore + Sync> Model<T>

Source

pub fn with_stop_words_file(self, stop_words_file: &str) -> Self

Examples found in repository?
examples/20newsgroup_stopwords/main.rs (line 11)
9fn main() {
10    let mut model = Model::new()
11        .with_stop_words_file("examples/data/english-stop-words-large.txt")
12        .with_pseudo_count(0.1);
13
14    let train_data = load_txt("examples/data/20newsgroup_train.txt");
15    let test_data = load_txt("examples/data/20newsgroup_test.txt");
16    let (test_labels, test_features): (Vec<String>, Vec<Vec<Feature>>) =
17        test_data.into_iter().map(|(s, v)| (s, v)).unzip();
18
19    println!(
20        "Train size: {}, test size: {}",
21        train_data.len(),
22        test_labels.len()
23    );
24
25    model.train("20newsgroup_model", &train_data);
26    println!("Training finished");
27
28    let predicts = model.predict_batch("20newsgroup_model", &test_features);
29    println!("Testing finished");
30
31    let total_test_score: f64 = test_labels
32        .iter()
33        .zip(predicts.iter())
34        .map(|(test_label, predict)| {
35            let (pred_label, _test_score) = predict
36                .iter()
37                .max_by(|(_ka, va), (_kb, vb)| va.partial_cmp(vb).unwrap())
38                .unwrap();
39            if test_label == pred_label {
40                1.0
41            } else {
42                0.0
43            }
44        })
45        .sum();
46    let score = total_test_score / test_labels.len() as f64;
47
48    println!("test score: {}", score);
49    assert!((0.66 - score).abs() < 1e-3);
50}
Source

pub fn with_default_gaussian_m2(self, default_gaussian_m2: f64) -> Self

Examples found in repository?
examples/gaussian_mom/main.rs (line 6)
5fn main() {
6    let mut model = Model::new().with_default_gaussian_m2(100.0);
7
8    let input_train = vec![
9        (
10            "go play well".to_owned(),
11            vec![
12                Feature {
13                    feature_type: FeatureType::Gaussian,
14                    name: "weather.degree".to_owned(),
15                    value: "32".to_owned(),
16                },
17                Feature {
18                    feature_type: FeatureType::Category,
19                    name: "weather.title".to_owned(),
20                    value: "sunny".to_owned(),
21                },
22                Feature {
23                    feature_type: FeatureType::Gaussian,
24                    name: "weather.wind.level".to_owned(),
25                    value: "3".to_owned(),
26                },
27            ],
28        ),
29        (
30            "go play well".to_owned(),
31            vec![
32                Feature {
33                    feature_type: FeatureType::Gaussian,
34                    name: "weather.degree".to_owned(),
35                    value: "24".to_owned(),
36                },
37                Feature {
38                    feature_type: FeatureType::Category,
39                    name: "weather.title".to_owned(),
40                    value: "cloudy".to_owned(),
41                },
42                Feature {
43                    feature_type: FeatureType::Gaussian,
44                    name: "weather.wind.level".to_owned(),
45                    value: "1".to_owned(),
46                },
47            ],
48        ),
49        (
50            "take umbrella".to_owned(),
51            vec![
52                Feature {
53                    feature_type: FeatureType::Gaussian,
54                    name: "weather.degree".to_owned(),
55                    value: "5".to_owned(),
56                },
57                Feature {
58                    feature_type: FeatureType::Category,
59                    name: "weather.title".to_owned(),
60                    value: "rainy".to_owned(),
61                },
62                Feature {
63                    feature_type: FeatureType::Gaussian,
64                    name: "weather.wind.level".to_owned(),
65                    value: "3".to_owned(),
66                },
67            ],
68        ),
69        (
70            "take umbrella".to_owned(),
71            vec![
72                Feature {
73                    feature_type: FeatureType::Gaussian,
74                    name: "weather.degree".to_owned(),
75                    value: "25".to_owned(),
76                },
77                Feature {
78                    feature_type: FeatureType::Category,
79                    name: "weather.title".to_owned(),
80                    value: "rainy".to_owned(),
81                },
82                Feature {
83                    feature_type: FeatureType::Gaussian,
84                    name: "weather.wind.level".to_owned(),
85                    value: "4".to_owned(),
86                },
87            ],
88        ),
89        (
90            "wear more cloth".to_owned(),
91            vec![
92                Feature {
93                    feature_type: FeatureType::Gaussian,
94                    name: "weather.degree".to_owned(),
95                    value: "-2".to_owned(),
96                },
97                Feature {
98                    feature_type: FeatureType::Category,
99                    name: "weather.title".to_owned(),
100                    value: "cloudy".to_owned(),
101                },
102                Feature {
103                    feature_type: FeatureType::Gaussian,
104                    name: "weather.wind.level".to_owned(),
105                    value: "3".to_owned(),
106                },
107            ],
108        ),
109        (
110            "wear more cloth".to_owned(),
111            vec![
112                Feature {
113                    feature_type: FeatureType::Gaussian,
114                    name: "weather.degree".to_owned(),
115                    value: "2".to_owned(),
116                },
117                Feature {
118                    feature_type: FeatureType::Category,
119                    name: "weather.title".to_owned(),
120                    value: "sunny".to_owned(),
121                },
122                Feature {
123                    feature_type: FeatureType::Gaussian,
124                    name: "weather.wind.level".to_owned(),
125                    value: "3".to_owned(),
126                },
127            ],
128        ),
129    ];
130
131    model.train("Mom's word to me before I go out", &input_train);
132
133    // test example 1
134    let result = model.predict(
135        "Mom's word to me before I go out",
136        &vec![
137            Feature {
138                feature_type: FeatureType::Gaussian,
139                name: "weather.degree".to_owned(),
140                value: "0.0".to_owned(),
141            },
142            Feature {
143                feature_type: FeatureType::Category,
144                name: "weather.title".to_owned(),
145                value: "sunny".to_owned(),
146            },
147            Feature {
148                feature_type: FeatureType::Gaussian,
149                name: "weather.wind.level".to_owned(),
150                value: "2".to_owned(),
151            },
152        ],
153    );
154
155    println!("{:?}\n", result);
156    assert!(result.get("wear more cloth").unwrap().abs() > 0.7);
157    // result will be:
158    // {"wear more cloth": 0.8145285759525068, "go play well": 0.1310511820033621, "take umbrella": 0.05442024204413106}
159
160    // test example 2
161    let result = model.predict(
162        "Mom's word to me before I go out",
163        &vec![
164            Feature {
165                feature_type: FeatureType::Gaussian,
166                name: "weather.degree".to_owned(),
167                value: "28".to_owned(),
168            },
169            Feature {
170                feature_type: FeatureType::Category,
171                name: "weather.title".to_owned(),
172                value: "rainy".to_owned(),
173            },
174            Feature {
175                feature_type: FeatureType::Gaussian,
176                name: "weather.wind.level".to_owned(),
177                value: "5".to_owned(),
178            },
179        ],
180    );
181
182    println!("{:?}\n", result);
183    assert!(result.get("take umbrella").unwrap().abs() > 0.6);
184    // result will be:
185    // {"wear more cloth": 0.040777064361781155, "take umbrella": 0.6929647650603867, "go play well": 0.2662581705778321}
186
187    // test example 3
188    let result = model.predict(
189        "Mom's word to me before I go out",
190        &vec![
191            Feature {
192                feature_type: FeatureType::Gaussian,
193                name: "weather.degree".to_owned(),
194                value: "25".to_owned(),
195            },
196            Feature {
197                feature_type: FeatureType::Category,
198                name: "weather.title".to_owned(),
199                value: "cloudy".to_owned(),
200            },
201            Feature {
202                feature_type: FeatureType::Gaussian,
203                name: "weather.wind.level".to_owned(),
204                value: "3".to_owned(),
205            },
206        ],
207    );
208
209    println!("{:?}\n", result);
210    assert!(result.get("go play well").unwrap().abs() > 0.5);
211    // result will be:
212    // {"go play well": 0.6267604626518958, "wear more cloth": 0.14149599917558417, "take umbrella": 0.23174353817252016}
213}
Source

pub fn with_pseudo_count(self, pseudo_count: f64) -> Self

Examples found in repository?
examples/uci_adult/main.rs (line 29)
26fn main() {
27    let mut model = Model::new()
28        .with_prior_factor(1.0)
29        .with_pseudo_count(0.1)
30        .with_default_gaussian_sigma_factor(0.05);
31
32    let train_data = load_txt("examples/data/adult.data");
33    let test_data = load_txt("examples/data/adult.test");
34    let (test_labels, test_features): (Vec<String>, Vec<Vec<Feature>>) =
35        test_data.into_iter().map(|(s, v)| (s, v)).unzip();
36
37    println!(
38        "Train size: {}, test size: {}",
39        train_data.len(),
40        test_features.len()
41    );
42
43    model.train("uci_adult", &train_data);
44    println!("Training finished");
45
46    let predicts = model.predict_batch("uci_adult", &test_features);
47    println!("Testing finished");
48
49    let total_test_score: f64 = test_labels
50        .iter()
51        .zip(predicts.iter())
52        .map(|(test_label, predict)| {
53            let (pred_label, _test_score) = predict
54                .iter()
55                .max_by(|(_ka, va), (_kb, vb)| va.partial_cmp(vb).unwrap())
56                .unwrap();
57
58            if test_label == pred_label {
59                1.0
60            } else {
61                0.0
62            }
63        })
64        .sum();
65
66    let score = total_test_score / test_labels.len() as f64;
67
68    println!("test score: {}", score);
69    assert!((0.83 - score).abs() < 1e-2);
70}
More examples
Hide additional examples
examples/20newsgroup_stopwords/main.rs (line 12)
9fn main() {
10    let mut model = Model::new()
11        .with_stop_words_file("examples/data/english-stop-words-large.txt")
12        .with_pseudo_count(0.1);
13
14    let train_data = load_txt("examples/data/20newsgroup_train.txt");
15    let test_data = load_txt("examples/data/20newsgroup_test.txt");
16    let (test_labels, test_features): (Vec<String>, Vec<Vec<Feature>>) =
17        test_data.into_iter().map(|(s, v)| (s, v)).unzip();
18
19    println!(
20        "Train size: {}, test size: {}",
21        train_data.len(),
22        test_labels.len()
23    );
24
25    model.train("20newsgroup_model", &train_data);
26    println!("Training finished");
27
28    let predicts = model.predict_batch("20newsgroup_model", &test_features);
29    println!("Testing finished");
30
31    let total_test_score: f64 = test_labels
32        .iter()
33        .zip(predicts.iter())
34        .map(|(test_label, predict)| {
35            let (pred_label, _test_score) = predict
36                .iter()
37                .max_by(|(_ka, va), (_kb, vb)| va.partial_cmp(vb).unwrap())
38                .unwrap();
39            if test_label == pred_label {
40                1.0
41            } else {
42                0.0
43            }
44        })
45        .sum();
46    let score = total_test_score / test_labels.len() as f64;
47
48    println!("test score: {}", score);
49    assert!((0.66 - score).abs() < 1e-3);
50}
Source

pub fn with_prior_factor(self, prior_factor: f64) -> Self

Examples found in repository?
examples/uci_adult/main.rs (line 28)
26fn main() {
27    let mut model = Model::new()
28        .with_prior_factor(1.0)
29        .with_pseudo_count(0.1)
30        .with_default_gaussian_sigma_factor(0.05);
31
32    let train_data = load_txt("examples/data/adult.data");
33    let test_data = load_txt("examples/data/adult.test");
34    let (test_labels, test_features): (Vec<String>, Vec<Vec<Feature>>) =
35        test_data.into_iter().map(|(s, v)| (s, v)).unzip();
36
37    println!(
38        "Train size: {}, test size: {}",
39        train_data.len(),
40        test_features.len()
41    );
42
43    model.train("uci_adult", &train_data);
44    println!("Training finished");
45
46    let predicts = model.predict_batch("uci_adult", &test_features);
47    println!("Testing finished");
48
49    let total_test_score: f64 = test_labels
50        .iter()
51        .zip(predicts.iter())
52        .map(|(test_label, predict)| {
53            let (pred_label, _test_score) = predict
54                .iter()
55                .max_by(|(_ka, va), (_kb, vb)| va.partial_cmp(vb).unwrap())
56                .unwrap();
57
58            if test_label == pred_label {
59                1.0
60            } else {
61                0.0
62            }
63        })
64        .sum();
65
66    let score = total_test_score / test_labels.len() as f64;
67
68    println!("test score: {}", score);
69    assert!((0.83 - score).abs() < 1e-2);
70}
Source

pub fn with_default_gaussian_sigma_factor( self, default_gaussian_sigma_factor: f64, ) -> Self

Examples found in repository?
examples/uci_adult/main.rs (line 30)
26fn main() {
27    let mut model = Model::new()
28        .with_prior_factor(1.0)
29        .with_pseudo_count(0.1)
30        .with_default_gaussian_sigma_factor(0.05);
31
32    let train_data = load_txt("examples/data/adult.data");
33    let test_data = load_txt("examples/data/adult.test");
34    let (test_labels, test_features): (Vec<String>, Vec<Vec<Feature>>) =
35        test_data.into_iter().map(|(s, v)| (s, v)).unzip();
36
37    println!(
38        "Train size: {}, test size: {}",
39        train_data.len(),
40        test_features.len()
41    );
42
43    model.train("uci_adult", &train_data);
44    println!("Training finished");
45
46    let predicts = model.predict_batch("uci_adult", &test_features);
47    println!("Testing finished");
48
49    let total_test_score: f64 = test_labels
50        .iter()
51        .zip(predicts.iter())
52        .map(|(test_label, predict)| {
53            let (pred_label, _test_score) = predict
54                .iter()
55                .max_by(|(_ka, va), (_kb, vb)| va.partial_cmp(vb).unwrap())
56                .unwrap();
57
58            if test_label == pred_label {
59                1.0
60            } else {
61                0.0
62            }
63        })
64        .sum();
65
66    let score = total_test_score / test_labels.len() as f64;
67
68    println!("test score: {}", score);
69    assert!((0.83 - score).abs() < 1e-2);
70}
Source

pub fn train( &mut self, model_name: &str, class_feature_pairs: &[(String, Vec<Feature>)], )

Examples found in repository?
examples/20newsgroup/main.rs (line 26)
12fn main() {
13    let mut model = Model::new();
14
15    let train_data = load_txt("examples/data/20newsgroup_train.txt");
16    let test_data = load_txt("examples/data/20newsgroup_test.txt");
17
18    let test_data_len = test_data.len();
19
20    println!(
21        "Train size: {}, test size: {}",
22        train_data.len(),
23        test_data.len()
24    );
25
26    model.train("20newsgroup_model", &train_data);
27    println!("Training finished");
28
29    let total_test_score: f64 = test_data
30        .into_par_iter()
31        .map(|(test_label, features)| {
32            let predict = model.predict("20newsgroup_model", &features);
33            let (pred_label, _test_score) = predict
34                .iter()
35                .max_by(|(_ka, va), (_kb, vb)| va.partial_cmp(vb).unwrap())
36                .unwrap();
37            if &test_label == pred_label {
38                1.0
39            } else {
40                0.0
41            }
42        })
43        .sum();
44
45    println!("Testing finished");
46
47    let score = total_test_score / test_data_len as f64;
48
49    println!("test score: {}", score);
50    assert!((0.5771375464684015 - score).abs() < 1e-10);
51    // old master gives       0.5785979819437068
52    // blayze gives           0.5770609318996416
53    // pblayze gives          0.5770609318996416
54    // python                 0.5779341
55}
More examples
Hide additional examples
examples/uci_adult/main.rs (line 43)
26fn main() {
27    let mut model = Model::new()
28        .with_prior_factor(1.0)
29        .with_pseudo_count(0.1)
30        .with_default_gaussian_sigma_factor(0.05);
31
32    let train_data = load_txt("examples/data/adult.data");
33    let test_data = load_txt("examples/data/adult.test");
34    let (test_labels, test_features): (Vec<String>, Vec<Vec<Feature>>) =
35        test_data.into_iter().map(|(s, v)| (s, v)).unzip();
36
37    println!(
38        "Train size: {}, test size: {}",
39        train_data.len(),
40        test_features.len()
41    );
42
43    model.train("uci_adult", &train_data);
44    println!("Training finished");
45
46    let predicts = model.predict_batch("uci_adult", &test_features);
47    println!("Testing finished");
48
49    let total_test_score: f64 = test_labels
50        .iter()
51        .zip(predicts.iter())
52        .map(|(test_label, predict)| {
53            let (pred_label, _test_score) = predict
54                .iter()
55                .max_by(|(_ka, va), (_kb, vb)| va.partial_cmp(vb).unwrap())
56                .unwrap();
57
58            if test_label == pred_label {
59                1.0
60            } else {
61                0.0
62            }
63        })
64        .sum();
65
66    let score = total_test_score / test_labels.len() as f64;
67
68    println!("test score: {}", score);
69    assert!((0.83 - score).abs() < 1e-2);
70}
examples/20newsgroup_stopwords/main.rs (line 25)
9fn main() {
10    let mut model = Model::new()
11        .with_stop_words_file("examples/data/english-stop-words-large.txt")
12        .with_pseudo_count(0.1);
13
14    let train_data = load_txt("examples/data/20newsgroup_train.txt");
15    let test_data = load_txt("examples/data/20newsgroup_test.txt");
16    let (test_labels, test_features): (Vec<String>, Vec<Vec<Feature>>) =
17        test_data.into_iter().map(|(s, v)| (s, v)).unzip();
18
19    println!(
20        "Train size: {}, test size: {}",
21        train_data.len(),
22        test_labels.len()
23    );
24
25    model.train("20newsgroup_model", &train_data);
26    println!("Training finished");
27
28    let predicts = model.predict_batch("20newsgroup_model", &test_features);
29    println!("Testing finished");
30
31    let total_test_score: f64 = test_labels
32        .iter()
33        .zip(predicts.iter())
34        .map(|(test_label, predict)| {
35            let (pred_label, _test_score) = predict
36                .iter()
37                .max_by(|(_ka, va), (_kb, vb)| va.partial_cmp(vb).unwrap())
38                .unwrap();
39            if test_label == pred_label {
40                1.0
41            } else {
42                0.0
43            }
44        })
45        .sum();
46    let score = total_test_score / test_labels.len() as f64;
47
48    println!("test score: {}", score);
49    assert!((0.66 - score).abs() < 1e-3);
50}
examples/spam/main.rs (line 51)
5fn main() {
6    let mut model = Model::new();
7
8    let input_train = vec![
9        (
10            "spam".to_owned(),
11            vec![
12                Feature {
13                    feature_type: FeatureType::Text,
14                    name: "email.body".to_owned(),
15                    value: "Good day dear beneficiary. This is Secretary to president of Benin republic is writing this email ... heritage, tax, dollars, money, credit card...".to_owned(),
16                },
17                Feature {
18                    feature_type: FeatureType::Category,
19                    name: "email.domain".to_owned(),
20                    value: "evil.com".to_owned(),
21                },
22                Feature {
23                    feature_type: FeatureType::Gaussian,
24                    name: "email.n_words".to_owned(),
25                    value: "482".to_owned(),
26                },
27            ],
28        ),
29        (
30            "not spam".to_owned(),
31            vec![
32                Feature {
33                    feature_type: FeatureType::Text,
34                    name: "email.body".to_owned(),
35                    value: "Hey bro, how's work these days, wanna join me for hotpot next week?".to_owned(),
36                },
37                Feature {
38                    feature_type: FeatureType::Category,
39                    name: "email.domain".to_owned(),
40                    value: "gmail.com".to_owned(),
41                },
42                Feature {
43                    feature_type: FeatureType::Gaussian,
44                    name: "email.n_words".to_owned(),
45                    value: "42".to_owned(),
46                },
47            ],
48        ),
49    ];
50
51    model.train("Spam checker", &input_train);
52
53    // test example 1
54    let result = model.predict(
55        "Spam checker",
56        &vec![
57            Feature {
58                feature_type: FeatureType::Text,
59                name: "email.body".to_owned(),
60                value: "Hey bro, This is Secretary to president want to give you some money. Please give me your credit card number ..."
61                    .to_owned(),
62            },
63            Feature {
64                feature_type: FeatureType::Category,
65                name: "email.domain".to_owned(),
66                value: "example.com".to_owned(),
67            },
68            Feature {
69                feature_type: FeatureType::Gaussian,
70                name: "email.n_words".to_owned(),
71                value: "288".to_owned(),
72            },
73        ],
74    );
75
76    println!("{:?}\n", result);
77    assert!(result.get("spam").unwrap().abs() > 0.9);
78    // result will be:
79    // {"not spam": 0.02950007253794831, "spam": 0.9704999274620517}
80
81    // test example 2
82    let result = model.predict(
83        "Spam checker",
84        &vec![
85            Feature {
86                feature_type: FeatureType::Text,
87                name: "email.body".to_owned(),
88                value: "Hey bro, hotpot again?".to_owned(),
89            },
90            Feature {
91                feature_type: FeatureType::Category,
92                name: "email.domain".to_owned(),
93                value: "gmail.com".to_owned(),
94            },
95            Feature {
96                feature_type: FeatureType::Gaussian,
97                name: "email.n_words".to_owned(),
98                value: "10".to_owned(),
99            },
100        ],
101    );
102
103    println!("{:?}\n", result);
104    assert!(result.get("not spam").unwrap().abs() > 0.9);
105    // result will be:
106    // {"not spam": 0.9976790459980525, "spam": 0.002320954001947624}
107}
examples/category_mom/main.rs (line 58)
5fn main() {
6    let mut model = Model::new();
7
8    let input_train = vec![
9        (
10            "go play well".to_owned(),
11            vec![Feature {
12                feature_type: FeatureType::Category,
13                name: "weather.title".to_owned(),
14                value: "sunny".to_owned(),
15            }],
16        ),
17        (
18            "go play well".to_owned(),
19            vec![Feature {
20                feature_type: FeatureType::Category,
21                name: "weather.title".to_owned(),
22                value: "cloudy".to_owned(),
23            }],
24        ),
25        (
26            "take umbrella".to_owned(),
27            vec![Feature {
28                feature_type: FeatureType::Category,
29                name: "weather.title".to_owned(),
30                value: "rainy".to_owned(),
31            }],
32        ),
33        (
34            "take umbrella".to_owned(),
35            vec![Feature {
36                feature_type: FeatureType::Category,
37                name: "weather.title".to_owned(),
38                value: "rainy".to_owned(),
39            }],
40        ),
41        (
42            "wear more cloth".to_owned(),
43            vec![Feature {
44                feature_type: FeatureType::Category,
45                name: "weather.title".to_owned(),
46                value: "cloudy".to_owned(),
47            }],
48        ),
49        (
50            "wear more cloth".to_owned(),
51            vec![Feature {
52                feature_type: FeatureType::Category,
53                name: "weather.title".to_owned(),
54                value: "sunny".to_owned(),
55            }],
56        ),
57    ];
58    model.train("Mom's word to me before I go out", &input_train);
59
60    // note: model is trained only with "weather.title" as a category feature,
61    // the following prediction test only will take account the "weather.title" feature in the input
62    // test example 1
63    let result = model.predict(
64        "Mom's word to me before I go out",
65        &vec![
66            Feature {
67                feature_type: FeatureType::Gaussian,
68                name: "weather.degree".to_owned(),
69                value: "0.0".to_owned(),
70            },
71            Feature {
72                feature_type: FeatureType::Category,
73                name: "weather.title".to_owned(),
74                value: "sunny".to_owned(),
75            },
76            Feature {
77                feature_type: FeatureType::Gaussian,
78                name: "weather.wind.level".to_owned(),
79                value: "2".to_owned(),
80            },
81        ],
82    );
83
84    println!("{:?}\n", result);
85    assert!(result.get("wear more cloth").unwrap().abs() >= 0.4);
86    assert!(result.get("go play well").unwrap().abs() >= 0.4);
87
88    // test example 2
89    let result = model.predict(
90        "Mom's word to me before I go out",
91        &vec![
92            Feature {
93                feature_type: FeatureType::Gaussian,
94                name: "weather.degree".to_owned(),
95                value: "22".to_owned(),
96            },
97            Feature {
98                feature_type: FeatureType::Category,
99                name: "weather.title".to_owned(),
100                value: "rainy".to_owned(),
101            },
102            Feature {
103                feature_type: FeatureType::Gaussian,
104                name: "weather.wind.level".to_owned(),
105                value: "5".to_owned(),
106            },
107        ],
108    );
109
110    println!("{:?}\n", result);
111    assert!(result.get("take umbrella").unwrap().abs() > 0.5);
112
113    // test example 3
114    let result = model.predict(
115        "Mom's word to me before I go out",
116        &vec![
117            Feature {
118                feature_type: FeatureType::Gaussian,
119                name: "weather.degree".to_owned(),
120                value: "25".to_owned(),
121            },
122            Feature {
123                feature_type: FeatureType::Category,
124                name: "weather.title".to_owned(),
125                value: "cloudy".to_owned(),
126            },
127            Feature {
128                feature_type: FeatureType::Gaussian,
129                name: "weather.wind.level".to_owned(),
130                value: "3".to_owned(),
131            },
132        ],
133    );
134
135    println!("{:?}\n", result);
136    assert!(result.get("wear more cloth").unwrap().abs() >= 0.3999);
137    assert!(result.get("go play well").unwrap().abs() >= 0.3999);
138}
examples/gaussian_mom/main.rs (line 131)
5fn main() {
6    let mut model = Model::new().with_default_gaussian_m2(100.0);
7
8    let input_train = vec![
9        (
10            "go play well".to_owned(),
11            vec![
12                Feature {
13                    feature_type: FeatureType::Gaussian,
14                    name: "weather.degree".to_owned(),
15                    value: "32".to_owned(),
16                },
17                Feature {
18                    feature_type: FeatureType::Category,
19                    name: "weather.title".to_owned(),
20                    value: "sunny".to_owned(),
21                },
22                Feature {
23                    feature_type: FeatureType::Gaussian,
24                    name: "weather.wind.level".to_owned(),
25                    value: "3".to_owned(),
26                },
27            ],
28        ),
29        (
30            "go play well".to_owned(),
31            vec![
32                Feature {
33                    feature_type: FeatureType::Gaussian,
34                    name: "weather.degree".to_owned(),
35                    value: "24".to_owned(),
36                },
37                Feature {
38                    feature_type: FeatureType::Category,
39                    name: "weather.title".to_owned(),
40                    value: "cloudy".to_owned(),
41                },
42                Feature {
43                    feature_type: FeatureType::Gaussian,
44                    name: "weather.wind.level".to_owned(),
45                    value: "1".to_owned(),
46                },
47            ],
48        ),
49        (
50            "take umbrella".to_owned(),
51            vec![
52                Feature {
53                    feature_type: FeatureType::Gaussian,
54                    name: "weather.degree".to_owned(),
55                    value: "5".to_owned(),
56                },
57                Feature {
58                    feature_type: FeatureType::Category,
59                    name: "weather.title".to_owned(),
60                    value: "rainy".to_owned(),
61                },
62                Feature {
63                    feature_type: FeatureType::Gaussian,
64                    name: "weather.wind.level".to_owned(),
65                    value: "3".to_owned(),
66                },
67            ],
68        ),
69        (
70            "take umbrella".to_owned(),
71            vec![
72                Feature {
73                    feature_type: FeatureType::Gaussian,
74                    name: "weather.degree".to_owned(),
75                    value: "25".to_owned(),
76                },
77                Feature {
78                    feature_type: FeatureType::Category,
79                    name: "weather.title".to_owned(),
80                    value: "rainy".to_owned(),
81                },
82                Feature {
83                    feature_type: FeatureType::Gaussian,
84                    name: "weather.wind.level".to_owned(),
85                    value: "4".to_owned(),
86                },
87            ],
88        ),
89        (
90            "wear more cloth".to_owned(),
91            vec![
92                Feature {
93                    feature_type: FeatureType::Gaussian,
94                    name: "weather.degree".to_owned(),
95                    value: "-2".to_owned(),
96                },
97                Feature {
98                    feature_type: FeatureType::Category,
99                    name: "weather.title".to_owned(),
100                    value: "cloudy".to_owned(),
101                },
102                Feature {
103                    feature_type: FeatureType::Gaussian,
104                    name: "weather.wind.level".to_owned(),
105                    value: "3".to_owned(),
106                },
107            ],
108        ),
109        (
110            "wear more cloth".to_owned(),
111            vec![
112                Feature {
113                    feature_type: FeatureType::Gaussian,
114                    name: "weather.degree".to_owned(),
115                    value: "2".to_owned(),
116                },
117                Feature {
118                    feature_type: FeatureType::Category,
119                    name: "weather.title".to_owned(),
120                    value: "sunny".to_owned(),
121                },
122                Feature {
123                    feature_type: FeatureType::Gaussian,
124                    name: "weather.wind.level".to_owned(),
125                    value: "3".to_owned(),
126                },
127            ],
128        ),
129    ];
130
131    model.train("Mom's word to me before I go out", &input_train);
132
133    // test example 1
134    let result = model.predict(
135        "Mom's word to me before I go out",
136        &vec![
137            Feature {
138                feature_type: FeatureType::Gaussian,
139                name: "weather.degree".to_owned(),
140                value: "0.0".to_owned(),
141            },
142            Feature {
143                feature_type: FeatureType::Category,
144                name: "weather.title".to_owned(),
145                value: "sunny".to_owned(),
146            },
147            Feature {
148                feature_type: FeatureType::Gaussian,
149                name: "weather.wind.level".to_owned(),
150                value: "2".to_owned(),
151            },
152        ],
153    );
154
155    println!("{:?}\n", result);
156    assert!(result.get("wear more cloth").unwrap().abs() > 0.7);
157    // result will be:
158    // {"wear more cloth": 0.8145285759525068, "go play well": 0.1310511820033621, "take umbrella": 0.05442024204413106}
159
160    // test example 2
161    let result = model.predict(
162        "Mom's word to me before I go out",
163        &vec![
164            Feature {
165                feature_type: FeatureType::Gaussian,
166                name: "weather.degree".to_owned(),
167                value: "28".to_owned(),
168            },
169            Feature {
170                feature_type: FeatureType::Category,
171                name: "weather.title".to_owned(),
172                value: "rainy".to_owned(),
173            },
174            Feature {
175                feature_type: FeatureType::Gaussian,
176                name: "weather.wind.level".to_owned(),
177                value: "5".to_owned(),
178            },
179        ],
180    );
181
182    println!("{:?}\n", result);
183    assert!(result.get("take umbrella").unwrap().abs() > 0.6);
184    // result will be:
185    // {"wear more cloth": 0.040777064361781155, "take umbrella": 0.6929647650603867, "go play well": 0.2662581705778321}
186
187    // test example 3
188    let result = model.predict(
189        "Mom's word to me before I go out",
190        &vec![
191            Feature {
192                feature_type: FeatureType::Gaussian,
193                name: "weather.degree".to_owned(),
194                value: "25".to_owned(),
195            },
196            Feature {
197                feature_type: FeatureType::Category,
198                name: "weather.title".to_owned(),
199                value: "cloudy".to_owned(),
200            },
201            Feature {
202                feature_type: FeatureType::Gaussian,
203                name: "weather.wind.level".to_owned(),
204                value: "3".to_owned(),
205            },
206        ],
207    );
208
209    println!("{:?}\n", result);
210    assert!(result.get("go play well").unwrap().abs() > 0.5);
211    // result will be:
212    // {"go play well": 0.6267604626518958, "wear more cloth": 0.14149599917558417, "take umbrella": 0.23174353817252016}
213}
Source

pub fn predict( &self, model_name: &str, features: &[Feature], ) -> HashMap<String, f64>

Examples found in repository?
examples/20newsgroup/main.rs (line 32)
12fn main() {
13    let mut model = Model::new();
14
15    let train_data = load_txt("examples/data/20newsgroup_train.txt");
16    let test_data = load_txt("examples/data/20newsgroup_test.txt");
17
18    let test_data_len = test_data.len();
19
20    println!(
21        "Train size: {}, test size: {}",
22        train_data.len(),
23        test_data.len()
24    );
25
26    model.train("20newsgroup_model", &train_data);
27    println!("Training finished");
28
29    let total_test_score: f64 = test_data
30        .into_par_iter()
31        .map(|(test_label, features)| {
32            let predict = model.predict("20newsgroup_model", &features);
33            let (pred_label, _test_score) = predict
34                .iter()
35                .max_by(|(_ka, va), (_kb, vb)| va.partial_cmp(vb).unwrap())
36                .unwrap();
37            if &test_label == pred_label {
38                1.0
39            } else {
40                0.0
41            }
42        })
43        .sum();
44
45    println!("Testing finished");
46
47    let score = total_test_score / test_data_len as f64;
48
49    println!("test score: {}", score);
50    assert!((0.5771375464684015 - score).abs() < 1e-10);
51    // old master gives       0.5785979819437068
52    // blayze gives           0.5770609318996416
53    // pblayze gives          0.5770609318996416
54    // python                 0.5779341
55}
More examples
Hide additional examples
examples/spam/main.rs (lines 54-74)
5fn main() {
6    let mut model = Model::new();
7
8    let input_train = vec![
9        (
10            "spam".to_owned(),
11            vec![
12                Feature {
13                    feature_type: FeatureType::Text,
14                    name: "email.body".to_owned(),
15                    value: "Good day dear beneficiary. This is Secretary to president of Benin republic is writing this email ... heritage, tax, dollars, money, credit card...".to_owned(),
16                },
17                Feature {
18                    feature_type: FeatureType::Category,
19                    name: "email.domain".to_owned(),
20                    value: "evil.com".to_owned(),
21                },
22                Feature {
23                    feature_type: FeatureType::Gaussian,
24                    name: "email.n_words".to_owned(),
25                    value: "482".to_owned(),
26                },
27            ],
28        ),
29        (
30            "not spam".to_owned(),
31            vec![
32                Feature {
33                    feature_type: FeatureType::Text,
34                    name: "email.body".to_owned(),
35                    value: "Hey bro, how's work these days, wanna join me for hotpot next week?".to_owned(),
36                },
37                Feature {
38                    feature_type: FeatureType::Category,
39                    name: "email.domain".to_owned(),
40                    value: "gmail.com".to_owned(),
41                },
42                Feature {
43                    feature_type: FeatureType::Gaussian,
44                    name: "email.n_words".to_owned(),
45                    value: "42".to_owned(),
46                },
47            ],
48        ),
49    ];
50
51    model.train("Spam checker", &input_train);
52
53    // test example 1
54    let result = model.predict(
55        "Spam checker",
56        &vec![
57            Feature {
58                feature_type: FeatureType::Text,
59                name: "email.body".to_owned(),
60                value: "Hey bro, This is Secretary to president want to give you some money. Please give me your credit card number ..."
61                    .to_owned(),
62            },
63            Feature {
64                feature_type: FeatureType::Category,
65                name: "email.domain".to_owned(),
66                value: "example.com".to_owned(),
67            },
68            Feature {
69                feature_type: FeatureType::Gaussian,
70                name: "email.n_words".to_owned(),
71                value: "288".to_owned(),
72            },
73        ],
74    );
75
76    println!("{:?}\n", result);
77    assert!(result.get("spam").unwrap().abs() > 0.9);
78    // result will be:
79    // {"not spam": 0.02950007253794831, "spam": 0.9704999274620517}
80
81    // test example 2
82    let result = model.predict(
83        "Spam checker",
84        &vec![
85            Feature {
86                feature_type: FeatureType::Text,
87                name: "email.body".to_owned(),
88                value: "Hey bro, hotpot again?".to_owned(),
89            },
90            Feature {
91                feature_type: FeatureType::Category,
92                name: "email.domain".to_owned(),
93                value: "gmail.com".to_owned(),
94            },
95            Feature {
96                feature_type: FeatureType::Gaussian,
97                name: "email.n_words".to_owned(),
98                value: "10".to_owned(),
99            },
100        ],
101    );
102
103    println!("{:?}\n", result);
104    assert!(result.get("not spam").unwrap().abs() > 0.9);
105    // result will be:
106    // {"not spam": 0.9976790459980525, "spam": 0.002320954001947624}
107}
examples/category_mom/main.rs (lines 63-82)
5fn main() {
6    let mut model = Model::new();
7
8    let input_train = vec![
9        (
10            "go play well".to_owned(),
11            vec![Feature {
12                feature_type: FeatureType::Category,
13                name: "weather.title".to_owned(),
14                value: "sunny".to_owned(),
15            }],
16        ),
17        (
18            "go play well".to_owned(),
19            vec![Feature {
20                feature_type: FeatureType::Category,
21                name: "weather.title".to_owned(),
22                value: "cloudy".to_owned(),
23            }],
24        ),
25        (
26            "take umbrella".to_owned(),
27            vec![Feature {
28                feature_type: FeatureType::Category,
29                name: "weather.title".to_owned(),
30                value: "rainy".to_owned(),
31            }],
32        ),
33        (
34            "take umbrella".to_owned(),
35            vec![Feature {
36                feature_type: FeatureType::Category,
37                name: "weather.title".to_owned(),
38                value: "rainy".to_owned(),
39            }],
40        ),
41        (
42            "wear more cloth".to_owned(),
43            vec![Feature {
44                feature_type: FeatureType::Category,
45                name: "weather.title".to_owned(),
46                value: "cloudy".to_owned(),
47            }],
48        ),
49        (
50            "wear more cloth".to_owned(),
51            vec![Feature {
52                feature_type: FeatureType::Category,
53                name: "weather.title".to_owned(),
54                value: "sunny".to_owned(),
55            }],
56        ),
57    ];
58    model.train("Mom's word to me before I go out", &input_train);
59
60    // note: model is trained only with "weather.title" as a category feature,
61    // the following prediction test only will take account the "weather.title" feature in the input
62    // test example 1
63    let result = model.predict(
64        "Mom's word to me before I go out",
65        &vec![
66            Feature {
67                feature_type: FeatureType::Gaussian,
68                name: "weather.degree".to_owned(),
69                value: "0.0".to_owned(),
70            },
71            Feature {
72                feature_type: FeatureType::Category,
73                name: "weather.title".to_owned(),
74                value: "sunny".to_owned(),
75            },
76            Feature {
77                feature_type: FeatureType::Gaussian,
78                name: "weather.wind.level".to_owned(),
79                value: "2".to_owned(),
80            },
81        ],
82    );
83
84    println!("{:?}\n", result);
85    assert!(result.get("wear more cloth").unwrap().abs() >= 0.4);
86    assert!(result.get("go play well").unwrap().abs() >= 0.4);
87
88    // test example 2
89    let result = model.predict(
90        "Mom's word to me before I go out",
91        &vec![
92            Feature {
93                feature_type: FeatureType::Gaussian,
94                name: "weather.degree".to_owned(),
95                value: "22".to_owned(),
96            },
97            Feature {
98                feature_type: FeatureType::Category,
99                name: "weather.title".to_owned(),
100                value: "rainy".to_owned(),
101            },
102            Feature {
103                feature_type: FeatureType::Gaussian,
104                name: "weather.wind.level".to_owned(),
105                value: "5".to_owned(),
106            },
107        ],
108    );
109
110    println!("{:?}\n", result);
111    assert!(result.get("take umbrella").unwrap().abs() > 0.5);
112
113    // test example 3
114    let result = model.predict(
115        "Mom's word to me before I go out",
116        &vec![
117            Feature {
118                feature_type: FeatureType::Gaussian,
119                name: "weather.degree".to_owned(),
120                value: "25".to_owned(),
121            },
122            Feature {
123                feature_type: FeatureType::Category,
124                name: "weather.title".to_owned(),
125                value: "cloudy".to_owned(),
126            },
127            Feature {
128                feature_type: FeatureType::Gaussian,
129                name: "weather.wind.level".to_owned(),
130                value: "3".to_owned(),
131            },
132        ],
133    );
134
135    println!("{:?}\n", result);
136    assert!(result.get("wear more cloth").unwrap().abs() >= 0.3999);
137    assert!(result.get("go play well").unwrap().abs() >= 0.3999);
138}
examples/gaussian_mom/main.rs (lines 134-153)
5fn main() {
6    let mut model = Model::new().with_default_gaussian_m2(100.0);
7
8    let input_train = vec![
9        (
10            "go play well".to_owned(),
11            vec![
12                Feature {
13                    feature_type: FeatureType::Gaussian,
14                    name: "weather.degree".to_owned(),
15                    value: "32".to_owned(),
16                },
17                Feature {
18                    feature_type: FeatureType::Category,
19                    name: "weather.title".to_owned(),
20                    value: "sunny".to_owned(),
21                },
22                Feature {
23                    feature_type: FeatureType::Gaussian,
24                    name: "weather.wind.level".to_owned(),
25                    value: "3".to_owned(),
26                },
27            ],
28        ),
29        (
30            "go play well".to_owned(),
31            vec![
32                Feature {
33                    feature_type: FeatureType::Gaussian,
34                    name: "weather.degree".to_owned(),
35                    value: "24".to_owned(),
36                },
37                Feature {
38                    feature_type: FeatureType::Category,
39                    name: "weather.title".to_owned(),
40                    value: "cloudy".to_owned(),
41                },
42                Feature {
43                    feature_type: FeatureType::Gaussian,
44                    name: "weather.wind.level".to_owned(),
45                    value: "1".to_owned(),
46                },
47            ],
48        ),
49        (
50            "take umbrella".to_owned(),
51            vec![
52                Feature {
53                    feature_type: FeatureType::Gaussian,
54                    name: "weather.degree".to_owned(),
55                    value: "5".to_owned(),
56                },
57                Feature {
58                    feature_type: FeatureType::Category,
59                    name: "weather.title".to_owned(),
60                    value: "rainy".to_owned(),
61                },
62                Feature {
63                    feature_type: FeatureType::Gaussian,
64                    name: "weather.wind.level".to_owned(),
65                    value: "3".to_owned(),
66                },
67            ],
68        ),
69        (
70            "take umbrella".to_owned(),
71            vec![
72                Feature {
73                    feature_type: FeatureType::Gaussian,
74                    name: "weather.degree".to_owned(),
75                    value: "25".to_owned(),
76                },
77                Feature {
78                    feature_type: FeatureType::Category,
79                    name: "weather.title".to_owned(),
80                    value: "rainy".to_owned(),
81                },
82                Feature {
83                    feature_type: FeatureType::Gaussian,
84                    name: "weather.wind.level".to_owned(),
85                    value: "4".to_owned(),
86                },
87            ],
88        ),
89        (
90            "wear more cloth".to_owned(),
91            vec![
92                Feature {
93                    feature_type: FeatureType::Gaussian,
94                    name: "weather.degree".to_owned(),
95                    value: "-2".to_owned(),
96                },
97                Feature {
98                    feature_type: FeatureType::Category,
99                    name: "weather.title".to_owned(),
100                    value: "cloudy".to_owned(),
101                },
102                Feature {
103                    feature_type: FeatureType::Gaussian,
104                    name: "weather.wind.level".to_owned(),
105                    value: "3".to_owned(),
106                },
107            ],
108        ),
109        (
110            "wear more cloth".to_owned(),
111            vec![
112                Feature {
113                    feature_type: FeatureType::Gaussian,
114                    name: "weather.degree".to_owned(),
115                    value: "2".to_owned(),
116                },
117                Feature {
118                    feature_type: FeatureType::Category,
119                    name: "weather.title".to_owned(),
120                    value: "sunny".to_owned(),
121                },
122                Feature {
123                    feature_type: FeatureType::Gaussian,
124                    name: "weather.wind.level".to_owned(),
125                    value: "3".to_owned(),
126                },
127            ],
128        ),
129    ];
130
131    model.train("Mom's word to me before I go out", &input_train);
132
133    // test example 1
134    let result = model.predict(
135        "Mom's word to me before I go out",
136        &vec![
137            Feature {
138                feature_type: FeatureType::Gaussian,
139                name: "weather.degree".to_owned(),
140                value: "0.0".to_owned(),
141            },
142            Feature {
143                feature_type: FeatureType::Category,
144                name: "weather.title".to_owned(),
145                value: "sunny".to_owned(),
146            },
147            Feature {
148                feature_type: FeatureType::Gaussian,
149                name: "weather.wind.level".to_owned(),
150                value: "2".to_owned(),
151            },
152        ],
153    );
154
155    println!("{:?}\n", result);
156    assert!(result.get("wear more cloth").unwrap().abs() > 0.7);
157    // result will be:
158    // {"wear more cloth": 0.8145285759525068, "go play well": 0.1310511820033621, "take umbrella": 0.05442024204413106}
159
160    // test example 2
161    let result = model.predict(
162        "Mom's word to me before I go out",
163        &vec![
164            Feature {
165                feature_type: FeatureType::Gaussian,
166                name: "weather.degree".to_owned(),
167                value: "28".to_owned(),
168            },
169            Feature {
170                feature_type: FeatureType::Category,
171                name: "weather.title".to_owned(),
172                value: "rainy".to_owned(),
173            },
174            Feature {
175                feature_type: FeatureType::Gaussian,
176                name: "weather.wind.level".to_owned(),
177                value: "5".to_owned(),
178            },
179        ],
180    );
181
182    println!("{:?}\n", result);
183    assert!(result.get("take umbrella").unwrap().abs() > 0.6);
184    // result will be:
185    // {"wear more cloth": 0.040777064361781155, "take umbrella": 0.6929647650603867, "go play well": 0.2662581705778321}
186
187    // test example 3
188    let result = model.predict(
189        "Mom's word to me before I go out",
190        &vec![
191            Feature {
192                feature_type: FeatureType::Gaussian,
193                name: "weather.degree".to_owned(),
194                value: "25".to_owned(),
195            },
196            Feature {
197                feature_type: FeatureType::Category,
198                name: "weather.title".to_owned(),
199                value: "cloudy".to_owned(),
200            },
201            Feature {
202                feature_type: FeatureType::Gaussian,
203                name: "weather.wind.level".to_owned(),
204                value: "3".to_owned(),
205            },
206        ],
207    );
208
209    println!("{:?}\n", result);
210    assert!(result.get("go play well").unwrap().abs() > 0.5);
211    // result will be:
212    // {"go play well": 0.6267604626518958, "wear more cloth": 0.14149599917558417, "take umbrella": 0.23174353817252016}
213}
Source

pub fn predict_batch( &self, model_name: &str, features_vec: &[Vec<Feature>], ) -> Vec<HashMap<String, f64>>

Examples found in repository?
examples/uci_adult/main.rs (line 46)
26fn main() {
27    let mut model = Model::new()
28        .with_prior_factor(1.0)
29        .with_pseudo_count(0.1)
30        .with_default_gaussian_sigma_factor(0.05);
31
32    let train_data = load_txt("examples/data/adult.data");
33    let test_data = load_txt("examples/data/adult.test");
34    let (test_labels, test_features): (Vec<String>, Vec<Vec<Feature>>) =
35        test_data.into_iter().map(|(s, v)| (s, v)).unzip();
36
37    println!(
38        "Train size: {}, test size: {}",
39        train_data.len(),
40        test_features.len()
41    );
42
43    model.train("uci_adult", &train_data);
44    println!("Training finished");
45
46    let predicts = model.predict_batch("uci_adult", &test_features);
47    println!("Testing finished");
48
49    let total_test_score: f64 = test_labels
50        .iter()
51        .zip(predicts.iter())
52        .map(|(test_label, predict)| {
53            let (pred_label, _test_score) = predict
54                .iter()
55                .max_by(|(_ka, va), (_kb, vb)| va.partial_cmp(vb).unwrap())
56                .unwrap();
57
58            if test_label == pred_label {
59                1.0
60            } else {
61                0.0
62            }
63        })
64        .sum();
65
66    let score = total_test_score / test_labels.len() as f64;
67
68    println!("test score: {}", score);
69    assert!((0.83 - score).abs() < 1e-2);
70}
More examples
Hide additional examples
examples/20newsgroup_stopwords/main.rs (line 28)
9fn main() {
10    let mut model = Model::new()
11        .with_stop_words_file("examples/data/english-stop-words-large.txt")
12        .with_pseudo_count(0.1);
13
14    let train_data = load_txt("examples/data/20newsgroup_train.txt");
15    let test_data = load_txt("examples/data/20newsgroup_test.txt");
16    let (test_labels, test_features): (Vec<String>, Vec<Vec<Feature>>) =
17        test_data.into_iter().map(|(s, v)| (s, v)).unzip();
18
19    println!(
20        "Train size: {}, test size: {}",
21        train_data.len(),
22        test_labels.len()
23    );
24
25    model.train("20newsgroup_model", &train_data);
26    println!("Training finished");
27
28    let predicts = model.predict_batch("20newsgroup_model", &test_features);
29    println!("Testing finished");
30
31    let total_test_score: f64 = test_labels
32        .iter()
33        .zip(predicts.iter())
34        .map(|(test_label, predict)| {
35            let (pred_label, _test_score) = predict
36                .iter()
37                .max_by(|(_ka, va), (_kb, vb)| va.partial_cmp(vb).unwrap())
38                .unwrap();
39            if test_label == pred_label {
40                1.0
41            } else {
42                0.0
43            }
44        })
45        .sum();
46    let score = total_test_score / test_labels.len() as f64;
47
48    println!("test score: {}", score);
49    assert!((0.66 - score).abs() < 1e-3);
50}
Source§

impl Model<ModelHashMapStore>

Source

pub fn new() -> Model<ModelHashMapStore>

Examples found in repository?
examples/20newsgroup/main.rs (line 13)
12fn main() {
13    let mut model = Model::new();
14
15    let train_data = load_txt("examples/data/20newsgroup_train.txt");
16    let test_data = load_txt("examples/data/20newsgroup_test.txt");
17
18    let test_data_len = test_data.len();
19
20    println!(
21        "Train size: {}, test size: {}",
22        train_data.len(),
23        test_data.len()
24    );
25
26    model.train("20newsgroup_model", &train_data);
27    println!("Training finished");
28
29    let total_test_score: f64 = test_data
30        .into_par_iter()
31        .map(|(test_label, features)| {
32            let predict = model.predict("20newsgroup_model", &features);
33            let (pred_label, _test_score) = predict
34                .iter()
35                .max_by(|(_ka, va), (_kb, vb)| va.partial_cmp(vb).unwrap())
36                .unwrap();
37            if &test_label == pred_label {
38                1.0
39            } else {
40                0.0
41            }
42        })
43        .sum();
44
45    println!("Testing finished");
46
47    let score = total_test_score / test_data_len as f64;
48
49    println!("test score: {}", score);
50    assert!((0.5771375464684015 - score).abs() < 1e-10);
51    // old master gives       0.5785979819437068
52    // blayze gives           0.5770609318996416
53    // pblayze gives          0.5770609318996416
54    // python                 0.5779341
55}
More examples
Hide additional examples
examples/uci_adult/main.rs (line 27)
26fn main() {
27    let mut model = Model::new()
28        .with_prior_factor(1.0)
29        .with_pseudo_count(0.1)
30        .with_default_gaussian_sigma_factor(0.05);
31
32    let train_data = load_txt("examples/data/adult.data");
33    let test_data = load_txt("examples/data/adult.test");
34    let (test_labels, test_features): (Vec<String>, Vec<Vec<Feature>>) =
35        test_data.into_iter().map(|(s, v)| (s, v)).unzip();
36
37    println!(
38        "Train size: {}, test size: {}",
39        train_data.len(),
40        test_features.len()
41    );
42
43    model.train("uci_adult", &train_data);
44    println!("Training finished");
45
46    let predicts = model.predict_batch("uci_adult", &test_features);
47    println!("Testing finished");
48
49    let total_test_score: f64 = test_labels
50        .iter()
51        .zip(predicts.iter())
52        .map(|(test_label, predict)| {
53            let (pred_label, _test_score) = predict
54                .iter()
55                .max_by(|(_ka, va), (_kb, vb)| va.partial_cmp(vb).unwrap())
56                .unwrap();
57
58            if test_label == pred_label {
59                1.0
60            } else {
61                0.0
62            }
63        })
64        .sum();
65
66    let score = total_test_score / test_labels.len() as f64;
67
68    println!("test score: {}", score);
69    assert!((0.83 - score).abs() < 1e-2);
70}
examples/20newsgroup_stopwords/main.rs (line 10)
9fn main() {
10    let mut model = Model::new()
11        .with_stop_words_file("examples/data/english-stop-words-large.txt")
12        .with_pseudo_count(0.1);
13
14    let train_data = load_txt("examples/data/20newsgroup_train.txt");
15    let test_data = load_txt("examples/data/20newsgroup_test.txt");
16    let (test_labels, test_features): (Vec<String>, Vec<Vec<Feature>>) =
17        test_data.into_iter().map(|(s, v)| (s, v)).unzip();
18
19    println!(
20        "Train size: {}, test size: {}",
21        train_data.len(),
22        test_labels.len()
23    );
24
25    model.train("20newsgroup_model", &train_data);
26    println!("Training finished");
27
28    let predicts = model.predict_batch("20newsgroup_model", &test_features);
29    println!("Testing finished");
30
31    let total_test_score: f64 = test_labels
32        .iter()
33        .zip(predicts.iter())
34        .map(|(test_label, predict)| {
35            let (pred_label, _test_score) = predict
36                .iter()
37                .max_by(|(_ka, va), (_kb, vb)| va.partial_cmp(vb).unwrap())
38                .unwrap();
39            if test_label == pred_label {
40                1.0
41            } else {
42                0.0
43            }
44        })
45        .sum();
46    let score = total_test_score / test_labels.len() as f64;
47
48    println!("test score: {}", score);
49    assert!((0.66 - score).abs() < 1e-3);
50}
examples/spam/main.rs (line 6)
5fn main() {
6    let mut model = Model::new();
7
8    let input_train = vec![
9        (
10            "spam".to_owned(),
11            vec![
12                Feature {
13                    feature_type: FeatureType::Text,
14                    name: "email.body".to_owned(),
15                    value: "Good day dear beneficiary. This is Secretary to president of Benin republic is writing this email ... heritage, tax, dollars, money, credit card...".to_owned(),
16                },
17                Feature {
18                    feature_type: FeatureType::Category,
19                    name: "email.domain".to_owned(),
20                    value: "evil.com".to_owned(),
21                },
22                Feature {
23                    feature_type: FeatureType::Gaussian,
24                    name: "email.n_words".to_owned(),
25                    value: "482".to_owned(),
26                },
27            ],
28        ),
29        (
30            "not spam".to_owned(),
31            vec![
32                Feature {
33                    feature_type: FeatureType::Text,
34                    name: "email.body".to_owned(),
35                    value: "Hey bro, how's work these days, wanna join me for hotpot next week?".to_owned(),
36                },
37                Feature {
38                    feature_type: FeatureType::Category,
39                    name: "email.domain".to_owned(),
40                    value: "gmail.com".to_owned(),
41                },
42                Feature {
43                    feature_type: FeatureType::Gaussian,
44                    name: "email.n_words".to_owned(),
45                    value: "42".to_owned(),
46                },
47            ],
48        ),
49    ];
50
51    model.train("Spam checker", &input_train);
52
53    // test example 1
54    let result = model.predict(
55        "Spam checker",
56        &vec![
57            Feature {
58                feature_type: FeatureType::Text,
59                name: "email.body".to_owned(),
60                value: "Hey bro, This is Secretary to president want to give you some money. Please give me your credit card number ..."
61                    .to_owned(),
62            },
63            Feature {
64                feature_type: FeatureType::Category,
65                name: "email.domain".to_owned(),
66                value: "example.com".to_owned(),
67            },
68            Feature {
69                feature_type: FeatureType::Gaussian,
70                name: "email.n_words".to_owned(),
71                value: "288".to_owned(),
72            },
73        ],
74    );
75
76    println!("{:?}\n", result);
77    assert!(result.get("spam").unwrap().abs() > 0.9);
78    // result will be:
79    // {"not spam": 0.02950007253794831, "spam": 0.9704999274620517}
80
81    // test example 2
82    let result = model.predict(
83        "Spam checker",
84        &vec![
85            Feature {
86                feature_type: FeatureType::Text,
87                name: "email.body".to_owned(),
88                value: "Hey bro, hotpot again?".to_owned(),
89            },
90            Feature {
91                feature_type: FeatureType::Category,
92                name: "email.domain".to_owned(),
93                value: "gmail.com".to_owned(),
94            },
95            Feature {
96                feature_type: FeatureType::Gaussian,
97                name: "email.n_words".to_owned(),
98                value: "10".to_owned(),
99            },
100        ],
101    );
102
103    println!("{:?}\n", result);
104    assert!(result.get("not spam").unwrap().abs() > 0.9);
105    // result will be:
106    // {"not spam": 0.9976790459980525, "spam": 0.002320954001947624}
107}
examples/category_mom/main.rs (line 6)
5fn main() {
6    let mut model = Model::new();
7
8    let input_train = vec![
9        (
10            "go play well".to_owned(),
11            vec![Feature {
12                feature_type: FeatureType::Category,
13                name: "weather.title".to_owned(),
14                value: "sunny".to_owned(),
15            }],
16        ),
17        (
18            "go play well".to_owned(),
19            vec![Feature {
20                feature_type: FeatureType::Category,
21                name: "weather.title".to_owned(),
22                value: "cloudy".to_owned(),
23            }],
24        ),
25        (
26            "take umbrella".to_owned(),
27            vec![Feature {
28                feature_type: FeatureType::Category,
29                name: "weather.title".to_owned(),
30                value: "rainy".to_owned(),
31            }],
32        ),
33        (
34            "take umbrella".to_owned(),
35            vec![Feature {
36                feature_type: FeatureType::Category,
37                name: "weather.title".to_owned(),
38                value: "rainy".to_owned(),
39            }],
40        ),
41        (
42            "wear more cloth".to_owned(),
43            vec![Feature {
44                feature_type: FeatureType::Category,
45                name: "weather.title".to_owned(),
46                value: "cloudy".to_owned(),
47            }],
48        ),
49        (
50            "wear more cloth".to_owned(),
51            vec![Feature {
52                feature_type: FeatureType::Category,
53                name: "weather.title".to_owned(),
54                value: "sunny".to_owned(),
55            }],
56        ),
57    ];
58    model.train("Mom's word to me before I go out", &input_train);
59
60    // note: model is trained only with "weather.title" as a category feature,
61    // the following prediction test only will take account the "weather.title" feature in the input
62    // test example 1
63    let result = model.predict(
64        "Mom's word to me before I go out",
65        &vec![
66            Feature {
67                feature_type: FeatureType::Gaussian,
68                name: "weather.degree".to_owned(),
69                value: "0.0".to_owned(),
70            },
71            Feature {
72                feature_type: FeatureType::Category,
73                name: "weather.title".to_owned(),
74                value: "sunny".to_owned(),
75            },
76            Feature {
77                feature_type: FeatureType::Gaussian,
78                name: "weather.wind.level".to_owned(),
79                value: "2".to_owned(),
80            },
81        ],
82    );
83
84    println!("{:?}\n", result);
85    assert!(result.get("wear more cloth").unwrap().abs() >= 0.4);
86    assert!(result.get("go play well").unwrap().abs() >= 0.4);
87
88    // test example 2
89    let result = model.predict(
90        "Mom's word to me before I go out",
91        &vec![
92            Feature {
93                feature_type: FeatureType::Gaussian,
94                name: "weather.degree".to_owned(),
95                value: "22".to_owned(),
96            },
97            Feature {
98                feature_type: FeatureType::Category,
99                name: "weather.title".to_owned(),
100                value: "rainy".to_owned(),
101            },
102            Feature {
103                feature_type: FeatureType::Gaussian,
104                name: "weather.wind.level".to_owned(),
105                value: "5".to_owned(),
106            },
107        ],
108    );
109
110    println!("{:?}\n", result);
111    assert!(result.get("take umbrella").unwrap().abs() > 0.5);
112
113    // test example 3
114    let result = model.predict(
115        "Mom's word to me before I go out",
116        &vec![
117            Feature {
118                feature_type: FeatureType::Gaussian,
119                name: "weather.degree".to_owned(),
120                value: "25".to_owned(),
121            },
122            Feature {
123                feature_type: FeatureType::Category,
124                name: "weather.title".to_owned(),
125                value: "cloudy".to_owned(),
126            },
127            Feature {
128                feature_type: FeatureType::Gaussian,
129                name: "weather.wind.level".to_owned(),
130                value: "3".to_owned(),
131            },
132        ],
133    );
134
135    println!("{:?}\n", result);
136    assert!(result.get("wear more cloth").unwrap().abs() >= 0.3999);
137    assert!(result.get("go play well").unwrap().abs() >= 0.3999);
138}
examples/gaussian_mom/main.rs (line 6)
5fn main() {
6    let mut model = Model::new().with_default_gaussian_m2(100.0);
7
8    let input_train = vec![
9        (
10            "go play well".to_owned(),
11            vec![
12                Feature {
13                    feature_type: FeatureType::Gaussian,
14                    name: "weather.degree".to_owned(),
15                    value: "32".to_owned(),
16                },
17                Feature {
18                    feature_type: FeatureType::Category,
19                    name: "weather.title".to_owned(),
20                    value: "sunny".to_owned(),
21                },
22                Feature {
23                    feature_type: FeatureType::Gaussian,
24                    name: "weather.wind.level".to_owned(),
25                    value: "3".to_owned(),
26                },
27            ],
28        ),
29        (
30            "go play well".to_owned(),
31            vec![
32                Feature {
33                    feature_type: FeatureType::Gaussian,
34                    name: "weather.degree".to_owned(),
35                    value: "24".to_owned(),
36                },
37                Feature {
38                    feature_type: FeatureType::Category,
39                    name: "weather.title".to_owned(),
40                    value: "cloudy".to_owned(),
41                },
42                Feature {
43                    feature_type: FeatureType::Gaussian,
44                    name: "weather.wind.level".to_owned(),
45                    value: "1".to_owned(),
46                },
47            ],
48        ),
49        (
50            "take umbrella".to_owned(),
51            vec![
52                Feature {
53                    feature_type: FeatureType::Gaussian,
54                    name: "weather.degree".to_owned(),
55                    value: "5".to_owned(),
56                },
57                Feature {
58                    feature_type: FeatureType::Category,
59                    name: "weather.title".to_owned(),
60                    value: "rainy".to_owned(),
61                },
62                Feature {
63                    feature_type: FeatureType::Gaussian,
64                    name: "weather.wind.level".to_owned(),
65                    value: "3".to_owned(),
66                },
67            ],
68        ),
69        (
70            "take umbrella".to_owned(),
71            vec![
72                Feature {
73                    feature_type: FeatureType::Gaussian,
74                    name: "weather.degree".to_owned(),
75                    value: "25".to_owned(),
76                },
77                Feature {
78                    feature_type: FeatureType::Category,
79                    name: "weather.title".to_owned(),
80                    value: "rainy".to_owned(),
81                },
82                Feature {
83                    feature_type: FeatureType::Gaussian,
84                    name: "weather.wind.level".to_owned(),
85                    value: "4".to_owned(),
86                },
87            ],
88        ),
89        (
90            "wear more cloth".to_owned(),
91            vec![
92                Feature {
93                    feature_type: FeatureType::Gaussian,
94                    name: "weather.degree".to_owned(),
95                    value: "-2".to_owned(),
96                },
97                Feature {
98                    feature_type: FeatureType::Category,
99                    name: "weather.title".to_owned(),
100                    value: "cloudy".to_owned(),
101                },
102                Feature {
103                    feature_type: FeatureType::Gaussian,
104                    name: "weather.wind.level".to_owned(),
105                    value: "3".to_owned(),
106                },
107            ],
108        ),
109        (
110            "wear more cloth".to_owned(),
111            vec![
112                Feature {
113                    feature_type: FeatureType::Gaussian,
114                    name: "weather.degree".to_owned(),
115                    value: "2".to_owned(),
116                },
117                Feature {
118                    feature_type: FeatureType::Category,
119                    name: "weather.title".to_owned(),
120                    value: "sunny".to_owned(),
121                },
122                Feature {
123                    feature_type: FeatureType::Gaussian,
124                    name: "weather.wind.level".to_owned(),
125                    value: "3".to_owned(),
126                },
127            ],
128        ),
129    ];
130
131    model.train("Mom's word to me before I go out", &input_train);
132
133    // test example 1
134    let result = model.predict(
135        "Mom's word to me before I go out",
136        &vec![
137            Feature {
138                feature_type: FeatureType::Gaussian,
139                name: "weather.degree".to_owned(),
140                value: "0.0".to_owned(),
141            },
142            Feature {
143                feature_type: FeatureType::Category,
144                name: "weather.title".to_owned(),
145                value: "sunny".to_owned(),
146            },
147            Feature {
148                feature_type: FeatureType::Gaussian,
149                name: "weather.wind.level".to_owned(),
150                value: "2".to_owned(),
151            },
152        ],
153    );
154
155    println!("{:?}\n", result);
156    assert!(result.get("wear more cloth").unwrap().abs() > 0.7);
157    // result will be:
158    // {"wear more cloth": 0.8145285759525068, "go play well": 0.1310511820033621, "take umbrella": 0.05442024204413106}
159
160    // test example 2
161    let result = model.predict(
162        "Mom's word to me before I go out",
163        &vec![
164            Feature {
165                feature_type: FeatureType::Gaussian,
166                name: "weather.degree".to_owned(),
167                value: "28".to_owned(),
168            },
169            Feature {
170                feature_type: FeatureType::Category,
171                name: "weather.title".to_owned(),
172                value: "rainy".to_owned(),
173            },
174            Feature {
175                feature_type: FeatureType::Gaussian,
176                name: "weather.wind.level".to_owned(),
177                value: "5".to_owned(),
178            },
179        ],
180    );
181
182    println!("{:?}\n", result);
183    assert!(result.get("take umbrella").unwrap().abs() > 0.6);
184    // result will be:
185    // {"wear more cloth": 0.040777064361781155, "take umbrella": 0.6929647650603867, "go play well": 0.2662581705778321}
186
187    // test example 3
188    let result = model.predict(
189        "Mom's word to me before I go out",
190        &vec![
191            Feature {
192                feature_type: FeatureType::Gaussian,
193                name: "weather.degree".to_owned(),
194                value: "25".to_owned(),
195            },
196            Feature {
197                feature_type: FeatureType::Category,
198                name: "weather.title".to_owned(),
199                value: "cloudy".to_owned(),
200            },
201            Feature {
202                feature_type: FeatureType::Gaussian,
203                name: "weather.wind.level".to_owned(),
204                value: "3".to_owned(),
205            },
206        ],
207    );
208
209    println!("{:?}\n", result);
210    assert!(result.get("go play well").unwrap().abs() > 0.5);
211    // result will be:
212    // {"go play well": 0.6267604626518958, "wear more cloth": 0.14149599917558417, "take umbrella": 0.23174353817252016}
213}

Trait Implementations§

Source§

impl<'de, T> Deserialize<'de> for Model<T>
where T: Deserialize<'de> + ModelStore + Sync,

Source§

fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>
where __D: Deserializer<'de>,

Deserialize this value from the given Serde deserializer. Read more
Source§

impl<T> Serialize for Model<T>
where T: Serialize + ModelStore + Sync,

Source§

fn serialize<__S>(&self, __serializer: __S) -> Result<__S::Ok, __S::Error>
where __S: Serializer,

Serialize this value into the given Serde serializer. Read more

Auto Trait Implementations§

§

impl<T> !Freeze for Model<T>

§

impl<T> !RefUnwindSafe for Model<T>

§

impl<T> Send for Model<T>
where T: Send,

§

impl<T> Sync for Model<T>

§

impl<T> Unpin for Model<T>
where T: Unpin,

§

impl<T> UnwindSafe for Model<T>
where T: UnwindSafe,

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<T> DeserializeOwned for T
where T: for<'de> Deserialize<'de>,