symagen/
random_data.rs

1//! Generate random data for use in benchmarks and tests.
2
3use distances::number::Number;
4use rand::prelude::*;
5
6/// The mathematical constant π.
7pub const PI: f64 = std::f64::consts::PI;
8
9/// Generate a randomized tabular dataset for use in benchmarks and tests with a
10/// given seed.
11///
12/// This uses the `rand` crate's `StdRng` as the random number generator.
13///
14/// # Arguments:
15///
16/// * `cardinality`: number of points to generate.
17/// * `dimensionality`: dimensionality of points to generate.
18/// * `min_val`: of each axis in the hypercube.
19/// * `max_val`: of each axis in the hypercube.
20/// * `seed`: for the random number generator.
21#[must_use]
22pub fn random_tabular_seedable<T: Number>(
23    cardinality: usize,
24    dimensionality: usize,
25    min_val: T,
26    max_val: T,
27    seed: u64,
28) -> Vec<Vec<T>> {
29    random_tabular(
30        cardinality,
31        dimensionality,
32        min_val,
33        max_val,
34        &mut rand::rngs::StdRng::seed_from_u64(seed),
35    )
36}
37
38/// Generate a randomized tabular dataset for use in benchmarks and tests.
39///
40/// # Arguments:
41///
42/// * `cardinality`: number of points to generate.
43/// * `dimensionality`: dimensionality of points to generate.
44/// * `min_val`: of each axis in the hypercube.
45/// * `max_val`: of each axis in the hypercube.
46/// * `rng`: random number generator.
47#[must_use]
48pub fn random_tabular<T: Number, R: Rng>(
49    cardinality: usize,
50    dimensionality: usize,
51    min_val: T,
52    max_val: T,
53    rng: &mut R,
54) -> Vec<Vec<T>> {
55    let diff = max_val - min_val;
56    (0..cardinality)
57        .map(|_| {
58            (0..dimensionality)
59                .map(|_| min_val + T::next_random(rng) % diff)
60                .collect()
61        })
62        .collect()
63}
64
65/// Generate a randomized dataset of string sequences.
66///
67/// # Arguments:
68///
69/// * `cardinality`: number of strings to generate.
70/// * `min_len`: minimum length of any string
71/// * `max_len`: maximum length of any string
72/// * `alphabet`: the alphabet from which to draw characters
73/// * `seed`: for the random number generator
74#[must_use]
75pub fn random_string(cardinality: usize, min_len: usize, max_len: usize, alphabet: &str, seed: u64) -> Vec<String> {
76    let alphabet = alphabet.chars().collect::<Vec<_>>();
77    let mut rng = rand::rngs::StdRng::seed_from_u64(seed);
78    (0..cardinality)
79        .map(|_| {
80            let len = rng.gen_range(min_len..=max_len);
81            (0..len)
82                .map(|_| alphabet[rng.gen_range(0..alphabet.len())])
83                .collect::<String>()
84        })
85        .collect()
86}
87
88/// Generate a single point (in Cartesian coordinates) from a uniform distribution
89/// inside an n-dimensional ball of given radius.
90///
91/// This function produces points in a uniform distribution inside the n-ball, and does
92/// so in linear time in the dimensionality of the ball. The algorithm is based on the
93/// method described in [this wikipedia article](https://en.wikipedia.org/wiki/N-sphere#Spherical_coordinates).
94///
95/// # Arguments:
96///
97/// * `dim`: dimensionality of the point to generate
98/// * `radius`: radius of the ball
99/// * `rng`: random number generator
100///
101/// # Returns:
102///
103/// * `Vec<T>`: the generated point
104pub fn n_ball<R: Rng>(dim: usize, radius: f64, rng: &mut R) -> Vec<f64> {
105    // sample random angles from 0 to 2π for the last angle and from 0 to π for the other angles.
106    let angles = {
107        let mut angles = (0..dim).map(|_| f64::next_random(rng) * PI).collect::<Vec<_>>();
108        angles[dim - 1] *= 2.;
109        angles
110    };
111
112    // The `scan` method is used to accumulate the product of the sine of the angles.
113    let sine_products = angles.iter().scan(1., |sine_product, &x| {
114        // each iteration, we'll multiply the state by the element ...
115        *sine_product *= f64::sin(x);
116        Some(*sine_product)
117    });
118
119    let cosines = angles.iter().map(|&x| f64::cos(x));
120
121    // sample a random radius value from 0 to the given radius
122    let r = radius * f64::next_random(rng);
123
124    sine_products
125        .zip(cosines)
126        .map(|(sine_product, cosine)| r * sine_product * cosine)
127        .collect()
128}