use crate::dataset::core::Dataset;
#[derive(Debug)]
pub struct NoData;
#[derive(Debug)]
pub struct HasData;
#[derive(Debug)]
pub struct NoTarget;
#[derive(Debug)]
pub struct HasTarget;
#[derive(Debug)]
pub struct DatasetBuilder<X, Y, DataState, TargetState> {
data: Option<X>,
target: Option<Y>,
feature_names: Vec<String>,
target_names: Option<Vec<String>>,
description: String,
_phantom_data: std::marker::PhantomData<DataState>,
_phantom_target: std::marker::PhantomData<TargetState>,
}
impl<X, Y> DatasetBuilder<X, Y, NoData, NoTarget> {
pub fn new() -> Self {
Self {
data: None,
target: None,
feature_names: Vec::new(),
target_names: None,
description: String::new(),
_phantom_data: std::marker::PhantomData,
_phantom_target: std::marker::PhantomData,
}
}
}
impl<X, Y, TargetState> DatasetBuilder<X, Y, NoData, TargetState> {
pub fn data(self, data: X) -> DatasetBuilder<X, Y, HasData, TargetState> {
DatasetBuilder {
data: Some(data),
target: self.target,
feature_names: self.feature_names,
target_names: self.target_names,
description: self.description,
_phantom_data: std::marker::PhantomData,
_phantom_target: std::marker::PhantomData,
}
}
}
impl<X, Y, DataState> DatasetBuilder<X, Y, DataState, NoTarget> {
pub fn target(self, target: Y) -> DatasetBuilder<X, Y, DataState, HasTarget> {
DatasetBuilder {
data: self.data,
target: Some(target),
feature_names: self.feature_names,
target_names: self.target_names,
description: self.description,
_phantom_data: std::marker::PhantomData,
_phantom_target: std::marker::PhantomData,
}
}
}
impl<X, Y, DataState, TargetState> DatasetBuilder<X, Y, DataState, TargetState> {
pub fn feature_names(mut self, names: Vec<String>) -> Self {
self.feature_names = names;
self
}
pub fn target_names(mut self, names: Vec<String>) -> Self {
self.target_names = Some(names);
self
}
pub fn description<S: Into<String>>(mut self, description: S) -> Self {
self.description = description.into();
self
}
}
impl<X, Y> DatasetBuilder<X, Y, HasData, HasTarget> {
pub fn build(self) -> Dataset<X, Y> {
Dataset {
data: self.data.expect("expected valid value"), target: self.target.expect("expected valid value"), feature_names: self.feature_names,
target_names: self.target_names,
description: self.description,
}
}
}
impl<X, Y> Default for DatasetBuilder<X, Y, NoData, NoTarget> {
fn default() -> Self {
Self::new()
}
}
#[allow(non_snake_case)]
#[cfg(test)]
mod tests {
use super::*;
use crate::types::{Array1, Array2};
#[test]
fn test_builder_pattern() {
let data = Array2::<f64>::zeros((10, 3));
let target = Array1::<f64>::zeros(10);
let dataset = DatasetBuilder::new()
.data(data)
.target(target)
.description("Test dataset")
.feature_names(vec!["f1".to_string(), "f2".to_string(), "f3".to_string()])
.build();
assert_eq!(dataset.description, "Test dataset");
assert_eq!(dataset.feature_names.len(), 3);
}
#[test]
fn test_builder_order_independence() {
let data = Array2::<f64>::zeros((5, 2));
let target = Array1::<f64>::zeros(5);
let dataset1 = DatasetBuilder::new()
.data(data.clone())
.target(target.clone())
.build();
let dataset2 = DatasetBuilder::new().target(target).data(data).build();
assert_eq!(dataset1.data.dim(), dataset2.data.dim());
assert_eq!(dataset1.target.len(), dataset2.target.len());
}
#[test]
fn test_builder_with_all_metadata() {
let data = Array2::<f64>::ones((3, 2));
let target = Array1::<f64>::ones(3);
let dataset = DatasetBuilder::new()
.data(data)
.target(target)
.feature_names(vec!["feature1".to_string(), "feature2".to_string()])
.target_names(vec!["class_a".to_string(), "class_b".to_string()])
.description("Complete dataset example")
.build();
assert_eq!(dataset.feature_names.len(), 2);
assert!(dataset.target_names.is_some());
assert_eq!(
dataset
.target_names
.as_ref()
.expect("value should be present")
.len(),
2
);
assert_eq!(dataset.description, "Complete dataset example");
}
}