1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
//! # Optical Recognition of Handwritten Digits Data Set
//!
//! | Number of Instances | Number of Attributes | Missing Values? | Associated Tasks: |
//! |-|-|-|-|
//! | 1797 | 64 | No | Classification, Clusteing |
//!
//! [Digits dataset](https://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits) contains normalized bitmaps of handwritten digits (0-9) from a preprinted form.
//! This multivariate dataset is frequently used to demonstrate various machine learning algorithms.
//!
//! All input attributes are integers in the range 0..16.
//!
use crate::dataset::deserialize_data;
use crate::dataset::Dataset;

/// Get dataset
pub fn load_dataset() -> Dataset<f32, f32> {
    let (x, y, num_samples, num_features) = match deserialize_data(std::include_bytes!("digits.xy"))
    {
        Err(why) => panic!("Can't deserialize digits.xy. {}", why),
        Ok((x, y, num_samples, num_features)) => (x, y, num_samples, num_features),
    };

    Dataset {
        data: x,
        target: y,
        num_samples,
        num_features,
        feature_names: vec![
            "sepal length (cm)",
            "sepal width (cm)",
            "petal length (cm)",
            "petal width (cm)",
        ]
        .iter()
        .map(|s| s.to_string())
        .collect(),
        target_names: vec!["setosa", "versicolor", "virginica"]
            .iter()
            .map(|s| s.to_string())
            .collect(),
        description: "Digits dataset: https://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits".to_string(),
    }
}

#[cfg(test)]
mod tests {

    use super::super::*;
    use super::*;

    #[test]
    #[ignore]
    fn refresh_digits_dataset() {
        // run this test to generate digits.xy file.
        let dataset = load_dataset();
        assert!(serialize_data(&dataset, "digits.xy").is_ok());
    }

    #[test]
    fn digits_dataset() {
        let dataset = load_dataset();
        assert_eq!(dataset.data.len(), 1797 * 64);
        assert_eq!(dataset.target.len(), 1797);
        assert_eq!(dataset.num_features, 64);
        assert_eq!(dataset.num_samples, 1797);
    }
}