use crate::dataset::deserialize_data;
use crate::dataset::Dataset;
pub fn load_dataset() -> Dataset<f32, f32> {
let (x, y, num_samples, num_features) =
match deserialize_data(std::include_bytes!("diabetes.xy")) {
Err(why) => panic!("Can't deserialize diabetes.xy. {}", why),
Ok((x, y, num_samples, num_features)) => (x, y, num_samples, num_features),
};
Dataset {
data: x,
target: y,
num_samples,
num_features,
feature_names: vec![
"Age", "Sex", "BMI", "BP", "S1", "S2", "S3", "S4", "S5", "S6",
]
.iter()
.map(|s| s.to_string())
.collect(),
target_names: vec!["Disease progression".to_string()],
description: "Diabetes Data: https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html"
.to_string(),
}
}
#[cfg(test)]
mod tests {
use super::super::*;
use super::*;
#[test]
#[ignore]
fn refresh_diabetes_dataset() {
let dataset = load_dataset();
assert!(serialize_data(&dataset, "diabetes.xy").is_ok());
}
#[test]
fn boston_dataset() {
let dataset = load_dataset();
assert_eq!(
dataset.data.len(),
dataset.num_features * dataset.num_samples
);
assert_eq!(dataset.target.len(), dataset.num_samples);
assert_eq!(dataset.num_features, 10);
assert_eq!(dataset.num_samples, 442);
}
}