use std::sync::OnceLock;
use crate::error::{Result, ScryLearnError};
use crate::matrix::DenseMatrix;
use crate::sparse::CscMatrix;
#[derive(Clone, Debug, Default)]
pub(crate) enum Storage {
#[default]
Dense,
Sparse(CscMatrix),
}
#[derive(Clone, Debug)]
pub struct ColumnStats {
pub name: String,
pub count: usize,
pub mean: f64,
pub std: f64,
pub min: f64,
pub q25: f64,
pub median: f64,
pub q75: f64,
pub max: f64,
}
#[derive(Clone, Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[non_exhaustive]
pub struct Dataset {
pub features: Vec<Vec<f64>>,
pub target: Vec<f64>,
pub feature_names: Vec<String>,
pub target_name: String,
pub class_labels: Option<Vec<String>>,
#[cfg_attr(feature = "serde", serde(skip))]
matrix: OnceLock<DenseMatrix>,
#[cfg_attr(feature = "serde", serde(skip))]
row_major_cache: Option<Vec<f64>>,
#[cfg_attr(feature = "serde", serde(skip))]
storage: Storage,
}
impl Dataset {
pub fn new(
features: Vec<Vec<f64>>,
target: Vec<f64>,
feature_names: Vec<String>,
target_name: impl Into<String>,
) -> Self {
assert!(
feature_names.len() == features.len(),
"feature_names.len()={} but features.len()={}",
feature_names.len(),
features.len(),
);
if let Some(first) = features.first() {
for (i, col) in features.iter().enumerate().skip(1) {
assert!(
col.len() == first.len(),
"feature column {i} has {} rows but column 0 has {}",
col.len(),
first.len(),
);
}
}
Self {
features,
target,
feature_names,
target_name: target_name.into(),
class_labels: None,
matrix: OnceLock::new(),
row_major_cache: None,
storage: Storage::Dense,
}
}
pub fn from_matrix(
matrix: DenseMatrix,
target: Vec<f64>,
feature_names: Vec<String>,
target_name: impl Into<String>,
) -> Self {
let features = matrix.to_col_vecs();
let cell = OnceLock::new();
let _ = cell.set(matrix);
Self {
features,
target,
feature_names,
target_name: target_name.into(),
class_labels: None,
matrix: cell,
row_major_cache: None,
storage: Storage::Dense,
}
}
#[inline]
pub fn matrix(&self) -> &DenseMatrix {
self.matrix.get_or_init(|| {
DenseMatrix::from_col_major_ref(&self.features)
.expect("DenseMatrix build from features failed")
})
}
#[cfg(feature = "csv")]
pub fn from_csv(path: &str, target_column: &str) -> Result<Self> {
let file = std::fs::File::open(path).map_err(ScryLearnError::Io)?;
Self::from_csv_reader(file, target_column)
}
#[cfg(feature = "csv")]
pub fn from_csv_reader(rdr: impl std::io::Read, target_column: &str) -> Result<Self> {
let mut csv_rdr = csv::ReaderBuilder::new()
.has_headers(true)
.flexible(true)
.from_reader(rdr);
let headers: Vec<String> = csv_rdr
.headers()
.map_err(|e| ScryLearnError::Csv(e.to_string()))?
.iter()
.map(std::string::ToString::to_string)
.collect();
let target_idx = headers
.iter()
.position(|h| h.eq_ignore_ascii_case(target_column))
.ok_or_else(|| ScryLearnError::InvalidColumn(target_column.to_string()))?;
let mut rows: Vec<Vec<String>> = Vec::new();
for result in csv_rdr.records() {
let record = result.map_err(|e| ScryLearnError::Csv(e.to_string()))?;
rows.push(
record
.iter()
.map(std::string::ToString::to_string)
.collect(),
);
}
if rows.is_empty() {
return Err(ScryLearnError::EmptyDataset);
}
let feature_indices: Vec<usize> = (0..headers.len()).filter(|&i| i != target_idx).collect();
let n_samples = rows.len();
let n_features = feature_indices.len();
let (target, class_labels) = parse_target_column(&rows, target_idx);
let mut features = vec![vec![0.0; n_samples]; n_features];
let mut feature_names = Vec::with_capacity(n_features);
for (feat_col, &col_idx) in feature_indices.iter().enumerate() {
feature_names.push(headers[col_idx].clone());
for (row_idx, row) in rows.iter().enumerate() {
let val = row.get(col_idx).map_or("", std::string::String::as_str);
features[feat_col][row_idx] = val.parse::<f64>().unwrap_or(f64::NAN);
}
}
Ok(Self {
features,
target,
feature_names,
target_name: headers[target_idx].clone(),
class_labels,
matrix: OnceLock::new(),
row_major_cache: None,
storage: Storage::Dense,
})
}
#[inline]
pub fn n_samples(&self) -> usize {
self.target.len()
}
#[inline]
pub fn n_features(&self) -> usize {
match &self.storage {
Storage::Sparse(csc) => csc.n_cols(),
Storage::Dense => self.features.len(),
}
}
pub fn n_classes(&self) -> usize {
self.class_labels.as_ref().map_or_else(
|| {
let mut vals: Vec<i64> = self.target.iter().map(|&v| v as i64).collect();
vals.sort_unstable();
vals.dedup();
vals.len()
},
Vec::len,
)
}
pub fn feature(&self, idx: usize) -> &[f64] {
&self.features[idx]
}
pub fn sample(&self, idx: usize) -> Vec<f64> {
self.features.iter().map(|col| col[idx]).collect()
}
pub fn feature_matrix(&self) -> Vec<Vec<f64>> {
let n = self.n_samples();
let m = self.n_features();
let mut matrix = vec![vec![0.0; m]; n];
for (j, feat_col) in self.features.iter().enumerate() {
for (i, &val) in feat_col.iter().enumerate() {
matrix[i][j] = val;
}
}
matrix
}
pub fn flat_feature_matrix(&mut self) -> &[f64] {
if self.row_major_cache.is_none() {
let n = self.n_samples();
let m = self.n_features();
let mut buf = vec![0.0; n * m];
if let Some(mat) = self.matrix.get() {
let src = mat.as_slice();
for j in 0..m {
let col_off = j * n;
for i in 0..n {
buf[i * m + j] = src[col_off + i];
}
}
} else {
for j in 0..m {
for i in 0..n {
buf[i * m + j] = self.features[j][i];
}
}
}
self.row_major_cache = Some(buf);
}
self.row_major_cache
.as_ref()
.expect("row_major_cache populated above")
}
#[inline]
pub fn sample_row<'a>(&self, cache: &'a [f64], idx: usize) -> &'a [f64] {
let m = self.n_features();
&cache[idx * m..(idx + 1) * m]
}
pub fn subset(&self, indices: &[usize]) -> Self {
let target: Vec<f64> = indices.iter().map(|&i| self.target[i]).collect();
if let Storage::Sparse(csc) = &self.storage {
let new_csc = subset_csc(csc, indices);
return Self {
features: Vec::new(),
target,
feature_names: self.feature_names.clone(),
target_name: self.target_name.clone(),
class_labels: self.class_labels.clone(),
matrix: OnceLock::new(),
row_major_cache: None,
storage: Storage::Sparse(new_csc),
};
}
let features: Vec<Vec<f64>> = self
.features
.iter()
.map(|col| indices.iter().map(|&i| col[i]).collect())
.collect();
Self {
features,
target,
feature_names: self.feature_names.clone(),
target_name: self.target_name.clone(),
class_labels: self.class_labels.clone(),
matrix: OnceLock::new(),
row_major_cache: None,
storage: Storage::Dense,
}
}
pub fn sync_matrix(&mut self) {
self.matrix = OnceLock::new();
self.row_major_cache = None;
}
#[inline]
pub fn invalidate_matrix(&mut self) {
self.matrix = OnceLock::new();
self.row_major_cache = None;
}
pub fn validate_finite(&self) -> Result<()> {
if let Storage::Sparse(csc) = &self.storage {
for j in 0..csc.n_cols() {
for (i, v) in csc.col(j).iter() {
if !v.is_finite() {
let name = self
.feature_names
.get(j)
.map_or_else(|| format!("feature[{j}]"), std::clone::Clone::clone);
return Err(ScryLearnError::InvalidData(format!(
"non-finite value ({v}) in {name} at sample {i}"
)));
}
}
}
} else {
for (j, col) in self.features.iter().enumerate() {
for (i, &v) in col.iter().enumerate() {
if !v.is_finite() {
let name = self
.feature_names
.get(j)
.map_or_else(|| format!("feature[{j}]"), std::clone::Clone::clone);
return Err(ScryLearnError::InvalidData(format!(
"non-finite value ({v}) in {name} at sample {i}"
)));
}
}
}
}
for (i, &v) in self.target.iter().enumerate() {
if !v.is_finite() {
return Err(ScryLearnError::InvalidData(format!(
"non-finite value ({v}) in target at sample {i}"
)));
}
}
Ok(())
}
pub fn validate_no_inf(&self) -> Result<()> {
if let Storage::Sparse(csc) = &self.storage {
for j in 0..csc.n_cols() {
for (i, v) in csc.col(j).iter() {
if v.is_infinite() {
let name = self
.feature_names
.get(j)
.map_or_else(|| format!("feature[{j}]"), std::clone::Clone::clone);
return Err(ScryLearnError::InvalidData(format!(
"infinite value ({v}) in {name} at sample {i}"
)));
}
}
}
} else {
for (j, col) in self.features.iter().enumerate() {
for (i, &v) in col.iter().enumerate() {
if v.is_infinite() {
let name = self
.feature_names
.get(j)
.map_or_else(|| format!("feature[{j}]"), std::clone::Clone::clone);
return Err(ScryLearnError::InvalidData(format!(
"infinite value ({v}) in {name} at sample {i}"
)));
}
}
}
}
for (i, &v) in self.target.iter().enumerate() {
if v.is_infinite() {
return Err(ScryLearnError::InvalidData(format!(
"infinite value ({v}) in target at sample {i}"
)));
}
}
Ok(())
}
pub fn with_class_labels(mut self, labels: Vec<String>) -> Self {
self.class_labels = Some(labels);
self
}
pub fn from_sparse(
csc: CscMatrix,
target: Vec<f64>,
feature_names: Vec<String>,
target_name: impl Into<String>,
) -> Self {
Self {
features: Vec::new(),
target,
feature_names,
target_name: target_name.into(),
class_labels: None,
matrix: OnceLock::new(),
row_major_cache: None,
storage: Storage::Sparse(csc),
}
}
#[inline]
pub fn is_sparse(&self) -> bool {
matches!(self.storage, Storage::Sparse(_))
}
pub fn sparse_csc(&self) -> Option<&CscMatrix> {
match &self.storage {
Storage::Sparse(m) => Some(m),
Storage::Dense => None,
}
}
pub fn sparse_csr(&self) -> Option<crate::sparse::CsrMatrix> {
self.sparse_csc().map(CscMatrix::to_csr)
}
pub fn summary(&self) -> Vec<ColumnStats> {
let n_feat = self.n_features();
let mut stats = Vec::with_capacity(n_feat + 1);
for j in 0..n_feat {
let name = self
.feature_names
.get(j)
.cloned()
.unwrap_or_else(|| format!("feature[{j}]"));
let col: Vec<f64> = if let Some(csc) = self.sparse_csc() {
let n_rows = csc.n_rows();
let mut dense = vec![0.0_f64; n_rows];
for (i, v) in csc.col(j).iter() {
dense[i] = v;
}
dense
} else {
self.features[j].clone()
};
stats.push(compute_column_stats(&name, &col));
}
stats.push(compute_column_stats(&self.target_name, &self.target));
stats
}
pub fn describe(&self) {
let stats = self.summary();
if stats.is_empty() {
return;
}
let labels = ["count", "mean", "std", "min", "25%", "50%", "75%", "max"];
let label_width = labels.iter().map(|l| l.len()).max().unwrap_or(0);
let col_widths: Vec<usize> = stats.iter().map(|s| s.name.len().max(12)).collect();
print!("{:>width$}", "", width = label_width);
for (i, s) in stats.iter().enumerate() {
print!(" {:>width$}", s.name, width = col_widths[i]);
}
println!();
for (row_idx, label) in labels.iter().enumerate() {
print!("{:>width$}", label, width = label_width);
for (i, s) in stats.iter().enumerate() {
let val = match row_idx {
0 => s.count as f64,
1 => s.mean,
2 => s.std,
3 => s.min,
4 => s.q25,
5 => s.median,
6 => s.q75,
7 => s.max,
_ => unreachable!(),
};
print!(" {:>width$.6}", val, width = col_widths[i]);
}
println!();
}
}
pub fn ensure_dense(&mut self) {
if let Storage::Sparse(csc) = &self.storage {
let n_cols = csc.n_cols();
let n_rows = csc.n_rows();
let mut features = vec![vec![0.0; n_rows]; n_cols];
for (j, feat_col) in features.iter_mut().enumerate() {
for (i, v) in csc.col(j).iter() {
feat_col[i] = v;
}
}
self.features = features;
self.matrix = OnceLock::new();
}
}
}
fn subset_csc(csc: &CscMatrix, indices: &[usize]) -> CscMatrix {
let n_new_rows = indices.len();
let n_cols = csc.n_cols();
let mut row_map = std::collections::HashMap::with_capacity(n_new_rows);
for (new_idx, &old_idx) in indices.iter().enumerate() {
row_map.insert(old_idx, new_idx);
}
let mut cols: Vec<Vec<f64>> = vec![vec![0.0; n_new_rows]; n_cols];
for (j, col) in cols.iter_mut().enumerate() {
for (old_row, val) in csc.col(j).iter() {
if let Some(&new_row) = row_map.get(&old_row) {
col[new_row] = val;
}
}
}
CscMatrix::from_dense(&cols)
}
fn compute_column_stats(name: &str, values: &[f64]) -> ColumnStats {
let mut sorted: Vec<f64> = values.iter().copied().filter(|v| v.is_finite()).collect();
sorted.sort_unstable_by(|a, b| a.total_cmp(b));
let count = sorted.len();
if count == 0 {
return ColumnStats {
name: name.to_string(),
count: 0,
mean: f64::NAN,
std: f64::NAN,
min: f64::NAN,
q25: f64::NAN,
median: f64::NAN,
q75: f64::NAN,
max: f64::NAN,
};
}
let sum: f64 = sorted.iter().sum();
let mean = sum / count as f64;
let std = if count <= 1 {
0.0
} else {
let var = sorted.iter().map(|v| (v - mean).powi(2)).sum::<f64>() / (count - 1) as f64;
var.sqrt()
};
let min = sorted[0];
let max = sorted[count - 1];
let q25 = percentile(&sorted, 0.25);
let median = percentile(&sorted, 0.50);
let q75 = percentile(&sorted, 0.75);
ColumnStats {
name: name.to_string(),
count,
mean,
std,
min,
q25,
median,
q75,
max,
}
}
fn percentile(sorted: &[f64], p: f64) -> f64 {
let n = sorted.len();
if n == 1 {
return sorted[0];
}
let idx = p * (n - 1) as f64;
let lo = idx.floor() as usize;
let hi = lo + 1;
let frac = idx - lo as f64;
if hi >= n {
sorted[lo]
} else {
sorted[lo] * (1.0 - frac) + sorted[hi] * frac
}
}
#[cfg(feature = "csv")]
fn parse_target_column(rows: &[Vec<String>], col_idx: usize) -> (Vec<f64>, Option<Vec<String>>) {
let numeric: Vec<Option<f64>> = rows
.iter()
.map(|row| row.get(col_idx).and_then(|s| s.parse::<f64>().ok()))
.collect();
let all_numeric = numeric.iter().all(std::option::Option::is_some);
if all_numeric {
return (numeric.into_iter().flatten().collect(), None);
}
let mut labels: Vec<String> = Vec::new();
let mut encoded = Vec::with_capacity(rows.len());
for row in rows {
let val = row.get(col_idx).map_or("", std::string::String::as_str);
let idx = labels.iter().position(|l| l == val).unwrap_or_else(|| {
labels.push(val.to_string());
labels.len() - 1
});
encoded.push(idx as f64);
}
(encoded, Some(labels))
}
#[cfg(test)]
#[allow(clippy::float_cmp)]
mod tests {
use super::*;
#[test]
fn test_dataset_new() {
let features = vec![vec![1.0, 2.0, 3.0], vec![4.0, 5.0, 6.0]];
let target = vec![0.0, 1.0, 0.0];
let ds = Dataset::new(features, target, vec!["f1".into(), "f2".into()], "label");
assert_eq!(ds.n_samples(), 3);
assert_eq!(ds.n_features(), 2);
assert_eq!(ds.feature(0), &[1.0, 2.0, 3.0]);
assert_eq!(ds.sample(1), vec![2.0, 5.0]);
}
#[cfg(feature = "csv")]
#[test]
fn test_dataset_from_csv_reader() {
let csv = "f1,f2,target\n1.0,4.0,a\n2.0,5.0,b\n3.0,6.0,a\n";
let ds = Dataset::from_csv_reader(csv.as_bytes(), "target").unwrap();
assert_eq!(ds.n_samples(), 3);
assert_eq!(ds.n_features(), 2);
assert_eq!(ds.target, vec![0.0, 1.0, 0.0]);
assert_eq!(
ds.class_labels,
Some(vec!["a".to_string(), "b".to_string()])
);
}
#[test]
fn test_dataset_subset() {
let features = vec![vec![1.0, 2.0, 3.0, 4.0], vec![10.0, 20.0, 30.0, 40.0]];
let target = vec![0.0, 1.0, 0.0, 1.0];
let ds = Dataset::new(features, target, vec!["a".into(), "b".into()], "t");
let sub = ds.subset(&[0, 2]);
assert_eq!(sub.n_samples(), 2);
assert_eq!(sub.feature(0), &[1.0, 3.0]);
assert_eq!(sub.target, vec![0.0, 0.0]);
}
#[cfg(feature = "csv")]
#[test]
fn test_empty_csv() {
let csv = "f1,target\n";
let err = Dataset::from_csv_reader(csv.as_bytes(), "target");
assert!(err.is_err());
}
#[test]
fn test_n_classes() {
let ds = Dataset::new(
vec![vec![1.0, 2.0, 3.0]],
vec![0.0, 1.0, 2.0],
vec!["f".into()],
"t",
);
assert_eq!(ds.n_classes(), 3);
}
#[test]
fn test_matrix_accessor() {
let features = vec![vec![1.0, 2.0], vec![3.0, 4.0]];
let ds = Dataset::new(features, vec![0.0, 1.0], vec!["a".into(), "b".into()], "t");
let mat = ds.matrix();
assert_eq!(mat.n_rows(), 2);
assert_eq!(mat.n_cols(), 2);
assert_eq!(mat.col(0), &[1.0, 2.0]);
assert_eq!(mat.col(1), &[3.0, 4.0]);
}
#[test]
fn test_from_matrix() {
let mat = DenseMatrix::from_col_major(vec![vec![1.0, 2.0], vec![3.0, 4.0]]).unwrap();
let ds = Dataset::from_matrix(mat, vec![0.0, 1.0], vec!["a".into(), "b".into()], "t");
assert_eq!(ds.n_samples(), 2);
assert_eq!(ds.n_features(), 2);
assert_eq!(ds.feature(0), &[1.0, 2.0]);
assert_eq!(ds.matrix().col(1), &[3.0, 4.0]);
}
fn sample_csc() -> CscMatrix {
CscMatrix::from_dense(&[vec![1.0, 0.0, 3.0], vec![0.0, 2.0, 0.0]])
}
#[test]
fn test_from_sparse_basic() {
let csc = sample_csc();
let ds = Dataset::from_sparse(csc, vec![0.0, 1.0, 0.0], vec!["a".into(), "b".into()], "t");
assert!(ds.is_sparse());
assert_eq!(ds.n_samples(), 3);
assert_eq!(ds.n_features(), 2);
}
#[test]
fn test_sparse_csc_accessor() {
let csc = sample_csc();
let ds = Dataset::from_sparse(csc, vec![0.0, 1.0, 0.0], vec!["a".into(), "b".into()], "t");
let csc_ref = ds.sparse_csc().expect("should have CSC");
assert_eq!(csc_ref.n_rows(), 3);
assert_eq!(csc_ref.n_cols(), 2);
assert_eq!(csc_ref.get(0, 0), 1.0);
assert_eq!(csc_ref.get(1, 1), 2.0);
assert_eq!(csc_ref.get(1, 0), 0.0);
}
#[test]
fn test_sparse_csr_conversion() {
let csc = sample_csc();
let ds = Dataset::from_sparse(csc, vec![0.0, 1.0, 0.0], vec!["a".into(), "b".into()], "t");
let csr = ds.sparse_csr().expect("should convert to CSR");
assert_eq!(csr.n_rows(), 3);
assert_eq!(csr.n_cols(), 2);
assert_eq!(csr.get(0, 0), 1.0);
assert_eq!(csr.get(2, 0), 3.0);
assert_eq!(csr.get(1, 1), 2.0);
}
#[test]
fn test_sparse_subset() {
let csc = sample_csc();
let ds = Dataset::from_sparse(csc, vec![0.0, 1.0, 2.0], vec!["a".into(), "b".into()], "t");
let sub = ds.subset(&[0, 2]);
assert!(sub.is_sparse());
assert_eq!(sub.n_samples(), 2);
assert_eq!(sub.n_features(), 2);
assert_eq!(sub.target, vec![0.0, 2.0]);
let csc_ref = sub.sparse_csc().unwrap();
assert_eq!(csc_ref.get(0, 0), 1.0); assert_eq!(csc_ref.get(1, 0), 3.0); }
#[test]
fn test_sparse_with_class_labels() {
let csc = sample_csc();
let ds = Dataset::from_sparse(csc, vec![0.0, 1.0, 0.0], vec!["a".into(), "b".into()], "t")
.with_class_labels(vec!["cat".into(), "dog".into()]);
assert!(ds.is_sparse());
assert_eq!(
ds.class_labels,
Some(vec!["cat".to_string(), "dog".to_string()])
);
}
#[test]
fn test_n_features_consistency() {
let dense_ds = Dataset::new(
vec![vec![1.0, 0.0, 3.0], vec![0.0, 2.0, 0.0]],
vec![0.0, 1.0, 0.0],
vec!["a".into(), "b".into()],
"t",
);
let csc = sample_csc();
let sparse_ds =
Dataset::from_sparse(csc, vec![0.0, 1.0, 0.0], vec!["a".into(), "b".into()], "t");
assert_eq!(dense_ds.n_features(), sparse_ds.n_features());
}
#[test]
fn test_ensure_dense() {
let csc = sample_csc();
let mut ds =
Dataset::from_sparse(csc, vec![0.0, 1.0, 0.0], vec!["a".into(), "b".into()], "t");
assert!(ds.features.is_empty());
ds.ensure_dense();
assert_eq!(ds.features.len(), 2);
assert_eq!(ds.features[0], vec![1.0, 0.0, 3.0]);
assert_eq!(ds.features[1], vec![0.0, 2.0, 0.0]);
}
#[test]
fn test_dense_not_sparse() {
let ds = Dataset::new(vec![vec![1.0, 2.0]], vec![0.0, 1.0], vec!["x".into()], "y");
assert!(!ds.is_sparse());
assert!(ds.sparse_csc().is_none());
assert!(ds.sparse_csr().is_none());
}
#[test]
fn test_matrix_lazy_rebuild_after_invalidate() {
let features = vec![vec![1.0, 2.0], vec![3.0, 4.0]];
let mut ds = Dataset::new(features, vec![0.0, 1.0], vec!["a".into(), "b".into()], "t");
assert_eq!(ds.matrix().col(0), &[1.0, 2.0]);
ds.invalidate_matrix();
assert_eq!(ds.matrix().col(0), &[1.0, 2.0]);
assert_eq!(ds.matrix().col(1), &[3.0, 4.0]);
}
#[test]
fn test_describe_summary() {
let features = vec![vec![1.0, 2.0, 3.0, 4.0], vec![10.0, 20.0, 30.0, 40.0]];
let target = vec![0.0, 1.0, 0.0, 1.0];
let ds = Dataset::new(features, target, vec!["a".into(), "b".into()], "t");
let stats = ds.summary();
assert_eq!(stats.len(), 3);
assert_eq!(stats[0].name, "a");
assert_eq!(stats[0].count, 4);
assert!((stats[0].mean - 2.5).abs() < 1e-10);
assert!((stats[0].min - 1.0).abs() < 1e-10);
assert!((stats[0].max - 4.0).abs() < 1e-10);
assert_eq!(stats[1].name, "b");
assert_eq!(stats[1].count, 4);
assert!((stats[1].mean - 25.0).abs() < 1e-10);
assert!((stats[1].min - 10.0).abs() < 1e-10);
assert!((stats[1].max - 40.0).abs() < 1e-10);
assert_eq!(stats[2].name, "t");
assert_eq!(stats[2].count, 4);
assert!((stats[2].mean - 0.5).abs() < 1e-10);
ds.describe();
}
#[test]
fn test_matrix_lazy_rebuild_reflects_feature_mutation() {
let features = vec![vec![1.0, 2.0], vec![3.0, 4.0]];
let mut ds = Dataset::new(features, vec![0.0, 1.0], vec!["a".into(), "b".into()], "t");
ds.features[0][0] = 99.0;
ds.invalidate_matrix();
assert_eq!(ds.matrix().col(0), &[99.0, 2.0]);
}
}