1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
use crate::dataset::deserialize_data;
use crate::dataset::Dataset;
pub fn load_dataset() -> Dataset<f32, f32> {
let (x, y, num_samples, num_features) = match deserialize_data(std::include_bytes!("iris.xy")) {
Err(why) => panic!("Can't deserialize iris.xy. {}", why),
Ok((x, y, num_samples, num_features)) => (x, y, num_samples, num_features),
};
Dataset {
data: x,
target: y,
num_samples,
num_features,
feature_names: vec![
"sepal length (cm)",
"sepal width (cm)",
"petal length (cm)",
"petal width (cm)",
]
.iter()
.map(|s| s.to_string())
.collect(),
target_names: vec!["setosa", "versicolor", "virginica"]
.iter()
.map(|s| s.to_string())
.collect(),
description: "Iris dataset: https://archive.ics.uci.edu/ml/datasets/iris".to_string(),
}
}
#[cfg(test)]
mod tests {
use super::super::*;
use super::*;
#[test]
#[ignore]
fn refresh_iris_dataset() {
let dataset = load_dataset();
assert!(serialize_data(&dataset, "iris.xy").is_ok());
}
#[test]
fn iris_dataset() {
let dataset = load_dataset();
assert_eq!(dataset.data.len(), 50 * 3 * 4);
assert_eq!(dataset.target.len(), 50 * 3);
assert_eq!(dataset.num_features, 4);
assert_eq!(dataset.num_samples, 50 * 3);
}
}