pub mod red_wine_quality;
pub mod white_wine_quality;
use crate::DatasetError;
use csv::ReaderBuilder;
use ndarray::{Array1, Array2};
fn parse_wine_data_to_array<R: std::io::Read>(
dataset_name: &str,
reader: R,
) -> Result<(Array2<f64>, Array1<f64>), DatasetError> {
let mut rdr = ReaderBuilder::new()
.delimiter(b';')
.has_headers(true)
.from_reader(reader);
let mut features_array = Vec::new();
let mut target_array = Vec::new();
let mut num_features: Option<usize> = None;
for (idx, result) in rdr.records().enumerate() {
let record = result.map_err(|e| DatasetError::csv_read_error(dataset_name, e))?;
let line_num = idx + 2;
if num_features.is_none() {
if record.len() < 2 {
return Err(DatasetError::invalid_column_count(
dataset_name,
2,
record.len(),
line_num,
&format!("{:?}", record),
));
}
num_features = Some(record.len() - 1);
}
let n_features = num_features.unwrap();
if record.len() != n_features + 1 {
return Err(DatasetError::invalid_column_count(
dataset_name,
n_features + 1,
record.len(),
line_num,
&format!("{:?}", record),
));
}
for i in 0..n_features {
let field = format!("feature[{i}]");
features_array.push(record[i].parse::<f64>().map_err(|e| {
DatasetError::parse_failed(
dataset_name,
&field,
line_num,
&format!("{:?}", record),
e,
)
})?);
}
target_array.push(record[n_features].parse::<f64>().map_err(|e| {
DatasetError::parse_failed(
dataset_name,
"target",
line_num,
&format!("{:?}", record),
e,
)
})?);
}
let n_samples = target_array.len();
if n_samples == 0 {
return Err(DatasetError::empty_dataset(dataset_name));
}
let n_features = num_features.unwrap();
let features_array = Array2::from_shape_vec((n_samples, n_features), features_array)
.map_err(|e| DatasetError::array_shape_error(dataset_name, "features", e))?;
let target_array = Array1::from_vec(target_array);
Ok((features_array, target_array))
}