pub fn make_moons_hd(
n: usize,
noise_xy: f64,
noise_hd: f64,
dims: usize,
seed: u64,
) -> Vec<Vec<f64>> {
use rand::{Rng, SeedableRng};
use rand_pcg::Pcg64;
use std::f64::consts::PI;
let mut rng = Pcg64::seed_from_u64(seed);
let n0 = n / 2;
let n1 = n - n0;
let mut out = Vec::with_capacity(n);
for _ in 0..n0 {
let t = rng.random::<f64>() * PI;
let x0: f64 = t.cos() + noise_xy * rng.random::<f64>();
let x1 = t.sin() + noise_xy * rng.random::<f64>();
let mut v = vec![0.0_f64; dims];
v[0] = x0;
v[1] = x1;
for d in 2..dims {
v[d] = noise_hd * rng.random::<f64>();
}
out.push(v);
}
for _ in 0..n1 {
let t = rng.random::<f64>() * PI;
let x0 = 1.0 - t.cos() + noise_xy * rng.random::<f64>();
let x1 = -t.sin() - 0.5 + noise_xy * rng.random::<f64>();
let mut v = vec![0.0_f64; dims];
v[0] = x0;
v[1] = x1;
for d in 2..dims {
v[d] = noise_hd * rng.random::<f64>();
}
out.push(v);
}
out
}
use rand::SeedableRng;
use rand::seq::SliceRandom;
use rand_distr::{Distribution, Normal, Uniform};
pub fn make_gaussian_blob(n_points: usize, noise: f64) -> Vec<Vec<f64>> {
let mut rng = rand::rngs::StdRng::seed_from_u64(789);
let mut rows = Vec::new();
let n_outliers = (n_points as f64 * 0.15).round() as usize;
let n_cluster_points = n_points - n_outliers;
let points_per_cluster = n_cluster_points / 3;
let centers = vec![
vec![0.0; 10], {
let mut c = vec![0.0; 10];
c[0] = 10.0;
c
},
{
let mut c = vec![0.0; 10];
c[1] = 10.0;
c
},
];
for center in ¢ers {
for _ in 0..points_per_cluster {
let mut point = Vec::new();
for &c in center {
let normal = Normal::new(c, noise).unwrap();
point.push(normal.sample(&mut rng));
}
rows.push(point);
}
}
let outlier_dist = Uniform::new(-5.0, 15.0).unwrap();
for _ in 0..n_outliers {
let mut point = Vec::new();
for _ in 0..10 {
point.push(outlier_dist.sample(&mut rng));
}
rows.push(point);
}
rows.shuffle(&mut rng);
rows
}
pub fn make_gaussian_hd(n_points: usize, noise: f64) -> Vec<Vec<f64>> {
let mut rng = rand::rngs::StdRng::seed_from_u64(435);
let mut rows = Vec::with_capacity(n_points);
let n_outliers = (n_points as f64 * 0.15).round() as usize;
let n_cluster_points = n_points - n_outliers;
let base = n_cluster_points / 3;
let rem = n_cluster_points % 3;
let cluster_sizes = [
base + if rem > 0 { 1 } else { 0 },
base + if rem > 1 { 1 } else { 0 },
base,
];
debug_assert_eq!(
cluster_sizes.iter().sum::<usize>(),
n_cluster_points,
"cluster size split must match n_cluster_points"
);
let centers = vec![
vec![0.0; 100],
{
let mut c = vec![0.0; 100];
c[0] = 10.0;
c
},
{
let mut c = vec![0.0; 100];
c[1] = 10.0;
c
},
];
for (cluster_idx, center) in centers.iter().enumerate() {
let n_for_cluster = cluster_sizes[cluster_idx];
for _ in 0..n_for_cluster {
let mut point = Vec::with_capacity(100);
for &c in center {
let normal = Normal::new(c, noise).unwrap();
point.push(normal.sample(&mut rng));
}
rows.push(point);
}
}
let outlier_dist = Uniform::new(-5.0, 15.0).unwrap();
for _ in 0..n_outliers {
let mut point = Vec::with_capacity(100);
for _ in 0..100 {
point.push(outlier_dist.sample(&mut rng));
}
rows.push(point);
}
if rows.len() > n_points {
rows.truncate(n_points);
}
while rows.len() < n_points {
let mut point = Vec::with_capacity(100);
for _ in 0..100 {
point.push(outlier_dist.sample(&mut rng));
}
rows.push(point);
}
rows.shuffle(&mut rng);
rows
}
pub fn make_energy_test_dataset(n_items: usize, n_features: usize, seed: u64) -> Vec<Vec<f64>> {
use rand::{Rng, SeedableRng};
use rand_xoshiro::Xoshiro256PlusPlus;
let mut rng = Xoshiro256PlusPlus::seed_from_u64(seed);
let n_clusters = 5;
let items_per_cluster = n_items / n_clusters;
let mut data = Vec::with_capacity(n_items);
let cluster_spacing = 10.0;
for cluster_id in 0..n_clusters {
let mut center = vec![0.0; n_features];
center[0] = cluster_id as f64 * cluster_spacing;
center[1] = (cluster_id % 2) as f64 * cluster_spacing;
for _ in 0..items_per_cluster {
let mut item = vec![0.0; n_features];
for j in 0..n_features {
let noise: f64 = rng.random::<f64>() * 2.0 - 1.0; item[j] = center[j] + noise * 0.8;
}
data.push(item);
}
}
let remaining = n_items % n_clusters;
for _ in 0..remaining {
data.push(
(0..n_features)
.map(|_| rng.random::<f64>() * 2.0 - 1.0)
.collect(),
);
}
data
}
pub fn make_gaussian_cliques(
n_per: usize,
noise: f64,
n_out: usize,
dims: usize,
seed: u64,
) -> Vec<Vec<f64>> {
use rand::SeedableRng;
use rand_distr::{Distribution, Normal, Uniform};
use rand_pcg::Pcg64;
let mut rng = Pcg64::seed_from_u64(seed);
let mut rows = Vec::with_capacity(3 * n_per + n_out);
let centers = vec![
{
let mut c = vec![0.0; dims];
c[0] = 10.0;
c
},
{
let mut c = vec![0.0; dims];
c[1] = 10.0;
c
},
{
let mut c = vec![0.0; dims];
c[0] = -10.0;
c[1] = -10.0;
c
},
];
for ctr in ¢ers {
for _ in 0..n_per {
let mut v = Vec::with_capacity(dims);
for &m in ctr {
let d = Normal::new(m, noise).unwrap();
v.push(d.sample(&mut rng));
}
rows.push(v);
}
}
for b in &[(0.5, 0.5), (0.7, 0.3), (0.3, 0.7)] {
let mut v = vec![0.0; dims];
v[0] = 10.0 * b.0 - 10.0 * (1.0 - b.0);
v[1] = 10.0 * b.1 - 10.0 * (1.0 - b.1);
for d in 2..dims {
v[d] = Normal::new(0.0, noise).unwrap().sample(&mut rng);
}
rows.push(v);
}
let uni = Uniform::new(-5.0, 15.0).unwrap();
for _ in 0..n_out {
let mut v = Vec::with_capacity(dims);
for _ in 0..dims {
v.push(uni.sample(&mut rng));
}
rows.push(v);
}
rows
}
pub fn make_gaussian_cliques_multi(
n_points: usize,
noise: f64,
n_cliques: usize,
dims: usize,
seed: u64,
) -> Vec<Vec<f64>> {
let mut rng = rand::rngs::StdRng::seed_from_u64(seed);
let mut rows = Vec::with_capacity(n_points);
let n_outliers = (n_points as f64 * 0.05).round() as usize;
let n_cluster_points = n_points - n_outliers;
let base = n_cluster_points / n_cliques;
let rem = n_cluster_points % n_cliques;
let grid_size = (n_cliques as f64).sqrt().ceil() as usize;
let spacing = 20.0;
let mut clique_centers = Vec::new();
for i in 0..n_cliques {
let mut center = vec![0.0; dims];
let grid_x = (i % grid_size) as f64;
let grid_y = (i / grid_size) as f64;
center[0] = grid_x * spacing;
if dims > 1 {
center[1] = grid_y * spacing;
}
clique_centers.push(center);
}
for (clique_idx, center) in clique_centers.iter().enumerate() {
let n_for_clique = base + if clique_idx < rem { 1 } else { 0 };
for _ in 0..n_for_clique {
let mut point = Vec::with_capacity(dims);
for &c in center {
let normal = Normal::new(c, noise).unwrap();
point.push(normal.sample(&mut rng));
}
rows.push(point);
}
}
let outlier_dist = Uniform::new(-10.0, (grid_size as f64) * spacing + 10.0).unwrap();
for _ in 0..n_outliers {
let mut point = Vec::with_capacity(dims);
for _ in 0..dims {
point.push(outlier_dist.sample(&mut rng));
}
rows.push(point);
}
if rows.len() > n_points {
rows.truncate(n_points);
}
while rows.len() < n_points {
let mut point = Vec::with_capacity(dims);
for _ in 0..dims {
point.push(outlier_dist.sample(&mut rng));
}
rows.push(point);
}
rows.shuffle(&mut rng);
rows
}
use rand::Rng;
pub fn generate_test_data(n: usize, f: usize, seed: u64) -> Vec<Vec<f64>> {
use rand::SeedableRng;
use rand::rngs::StdRng;
let mut rng = StdRng::seed_from_u64(seed);
(0..n)
.map(|_| (0..f).map(|_| rng.random_range(-1.0..1.0)).collect())
.collect()
}