1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
use ndarray::{Array2, ArrayBase, Axis, Data, Ix1, Ix2};
use ndarray_rand::rand::Rng;
use ndarray_rand::rand_distr::StandardNormal;
use ndarray_rand::RandomExt;
use num_traits::float::FloatConst;

/// Computes a similarity matrix with gaussian kernel and scaling parameter `eps`
///
/// The generated matrix is a upper triangular matrix with dimension NxN (number of observations) and contains the similarity between all permutations of observations
/// similarity
pub fn to_gaussian_similarity(
    observations: &ArrayBase<impl Data<Elem = f64>, Ix2>,
    eps: f64,
) -> Array2<f64> {
    let n_observations = observations.len_of(Axis(0));
    let mut similarity = Array2::eye(n_observations);

    for i in 0..n_observations {
        for j in 0..n_observations {
            let a = observations.row(i);
            let b = observations.row(j);

            let distance = a
                .iter()
                .zip(b.iter())
                .map(|(x, y)| (x - y).powf(2.0))
                .sum::<f64>();

            similarity[(i, j)] = (-distance / eps).exp();
        }
    }

    similarity
}
///
/// Generates a three dimension swiss roll, centered at the origin with height `height` and
/// outwards speed `speed`
pub fn generate_swissroll(
    height: f64,
    speed: f64,
    n_points: usize,
    rng: &mut impl Rng,
) -> Array2<f64> {
    let mut roll: Array2<f64> = Array2::zeros((n_points, 3));

    for i in 0..n_points {
        let z = rng.gen_range(0.0, height);
        let phi: f64 = rng.gen_range(0.0, 10.0);
        //let offset: f64 = rng.gen_range(-0.5, 0.5);
        let offset = 0.0;

        let x = speed * phi * phi.cos() + offset;
        let y = speed * phi * phi.sin() + offset;

        roll[(i, 0)] = x;
        roll[(i, 1)] = y;
        roll[(i, 2)] = z;
    }
    roll
}

pub fn generate_convoluted_rings(
    rings: &[(f64, f64)],
    n_points: usize,
    rng: &mut impl Rng,
) -> Array2<f64> {
    let n_points = (n_points as f32 / rings.len() as f32).ceil() as usize;
    let mut array = Array2::zeros((n_points * rings.len(), 3));

    for (n, (start, end)) in rings.iter().enumerate() {
        // inner circle
        for i in 0..n_points {
            let r: f64 = rng.gen_range(start, end);
            let phi: f64 = rng.gen_range(0.0, f64::PI() * 2.0);
            let theta: f64 = rng.gen_range(0.0, f64::PI() * 2.0);

            let x = theta.sin() * phi.cos() * r;
            let y = theta.sin() * phi.sin() * r;
            let z = theta.cos() * r;

            array[(n * n_points + i, 0)] = x;
            array[(n * n_points + i, 1)] = y;
            array[(n * n_points + i, 2)] = z;
        }
    }

    array
}

pub fn generate_convoluted_rings2d(
    rings: &[(f64, f64)],
    n_points: usize,
    rng: &mut impl Rng,
) -> Array2<f64> {
    let n_points = (n_points as f32 / rings.len() as f32).ceil() as usize;
    let mut array = Array2::zeros((n_points * rings.len(), 2));

    for (n, (start, end)) in rings.iter().enumerate() {
        // inner circle
        for i in 0..n_points {
            let r: f64 = rng.gen_range(start, end);
            let phi: f64 = rng.gen_range(0.0, f64::PI() * 2.0);

            let x = phi.cos() * r;
            let y = phi.sin() * r;

            array[(n * n_points + i, 0)] = x;
            array[(n * n_points + i, 1)] = y;
        }
    }

    array
}
/// Given an input matrix `blob_centroids`, with shape `(n_blobs, n_features)`,
/// generate `blob_size` data points (a "blob") around each of the blob centroids.
///
/// More specifically, each blob is formed by `blob_size` points sampled from a normal
/// distribution centered in the blob centroid with unit variance.
///
/// `generate_blobs` can be used to quickly assemble a synthetic dataset to test or
/// benchmark various clustering algorithms on a best-case scenario input.
pub fn generate_blobs(
    blob_size: usize,
    blob_centroids: &ArrayBase<impl Data<Elem = f64>, Ix2>,
    rng: &mut impl Rng,
) -> Array2<f64> {
    let (n_centroids, n_features) = blob_centroids.dim();
    let mut blobs: Array2<f64> = Array2::zeros((n_centroids * blob_size, n_features));

    for (blob_index, blob_centroid) in blob_centroids.genrows().into_iter().enumerate() {
        let blob = generate_blob(blob_size, &blob_centroid, rng);

        let indexes = s![blob_index * blob_size..(blob_index + 1) * blob_size, ..];
        blobs.slice_mut(indexes).assign(&blob);
    }
    blobs
}
/// Generate `blob_size` data points (a "blob") around `blob_centroid`.
///
/// More specifically, the blob is formed by `blob_size` points sampled from a normal
/// distribution centered in `blob_centroid` with unit variance.
///
/// `generate_blob` can be used to quickly assemble a synthetic stereotypical cluster.
pub fn generate_blob(
    blob_size: usize,
    blob_centroid: &ArrayBase<impl Data<Elem = f64>, Ix1>,
    rng: &mut impl Rng,
) -> Array2<f64> {
    let shape = (blob_size, blob_centroid.len());
    let origin_blob: Array2<f64> = Array2::random_using(shape, StandardNormal, rng);
    origin_blob + blob_centroid
}