use linfa::Dataset;
use ndarray::{s, Array, Array2, ArrayBase, Data, Ix1, Ix2};
use ndarray_rand::{
rand::Rng,
rand_distr::{Distribution, StandardNormal},
RandomExt,
};
pub fn blobs(
blob_size: usize,
blob_centroids: &ArrayBase<impl Data<Elem = f64>, Ix2>,
rng: &mut impl Rng,
) -> Array2<f64> {
blobs_with_distribution(blob_size, blob_centroids, StandardNormal, rng)
}
pub fn blobs_with_distribution(
blob_size: usize,
blob_centroids: &ArrayBase<impl Data<Elem = f64>, Ix2>,
distribution: impl Distribution<f64> + Clone,
rng: &mut impl Rng,
) -> Array2<f64> {
let (n_centroids, n_features) = blob_centroids.dim();
let mut blobs: Array2<f64> = Array2::zeros((n_centroids * blob_size, n_features));
for (blob_index, blob_centroid) in blob_centroids.rows().into_iter().enumerate() {
let blob = make_blob(blob_size, &blob_centroid, distribution.clone(), rng);
let indexes = s![blob_index * blob_size..(blob_index + 1) * blob_size, ..];
blobs.slice_mut(indexes).assign(&blob);
}
blobs
}
fn make_blob(
blob_size: usize,
blob_centroid: &ArrayBase<impl Data<Elem = f64>, Ix1>,
distribution: impl Distribution<f64>,
rng: &mut impl Rng,
) -> Array2<f64> {
let shape = (blob_size, blob_centroid.len());
let origin_blob: Array2<f64> = Array::random_using(shape, distribution, rng);
origin_blob + blob_centroid
}
pub fn make_dataset<X, Y>(
num_rows: usize,
num_feats: usize,
num_targets: usize,
feat_distr: X,
target_distr: Y,
) -> Dataset<f64, f64>
where
X: Distribution<f64>,
Y: Distribution<f64>,
{
let features = Array::random((num_rows, num_feats), feat_distr);
let targets = Array::random((num_rows, num_targets), target_distr);
Dataset::new(features, targets)
}