chrono_probe/input/
distribution.rs

1//! This module implements an easy way to abstract the generation of input sizes.
2//!
3//! Provides:
4//! 1) A trait that can be used to define your own distribution for input sizes.
5//! 2) A trait that can be used to define your a general probability distribution.
6//! 3) A set of predefined distributions.
7//!
8//! # Examples
9//!
10//! To test this module you can easily copy and paste the following code snippets.
11//!
12//! ## Predefined distributions
13//!
14//! This example demonstrates how to use a predefined distribution to generate a vector of input
15//! sizes. In this specific case, we use the [`Uniform`] distribution as an example.
16//!
17//! ```
18//! use chrono_probe::input::distribution::*;
19//!
20//! // First, we create an instance of the Uniform distribution
21//! let uniform = Uniform::new(1..=100);
22//!
23//! // Then we generate a vector of 10 input sizes using the distribution
24//! let lengths = uniform.generate(10);
25//!
26//! // Finally, we print the vector of input sizes
27//! println!("{:?}", lengths);
28//! ```
29//!
30//! ## Custom distribution
31//!
32//! In this example we will cover the steps needed to create a custom distribution.
33//! The goal is to generate a vector of input sizes that are all equal to a given constant.
34//! To achieve this goal, we will take two different approaches:
35//!
36//! * Implement the [`Distribution`] trait directly
37//! * Implement the [`ProbabilityDistribution`] trait
38//!
39//! ### Implementing the [`Distribution`] trait
40//!
41//! * Create a struct representing the custom distribution
42//! * Implement a way of creating an instance of the distribution
43//! * Implement the [`Debug`] trait to allow printing the name of your distribution in the plots
44//! * Implement the [`Distribution`] trait, which specifies how to generate the input sizes
45//!
46//! ```
47//! use std::fmt::Debug;
48//!
49//! use chrono_probe::input::distribution::*;
50//!
51//! // First, we create the struct representing the custom distribution
52//! struct Constant {
53//!     k: usize,
54//! }
55//!
56//! // Then we implement a way of creating an instance of the distribution
57//! impl Constant {
58//!     pub fn new(k: usize) -> Self { Self { k } }
59//! }
60//!
61//! // By implementing the Display trait, we can print the name of our distribution in the plots
62//! impl Debug for Constant {
63//!     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
64//!        write!(f, "Costant")
65//!     }
66//! }
67//!
68//! // Lastly, we implement the Distribution trait, which specifies how to generate the input sizes
69//! impl Distribution for Constant {
70//!     fn generate(&self, n: usize) -> Vec<usize> {
71//!         let mut lengths = Vec::with_capacity(n);
72//!         for _ in 0..n {
73//!             lengths.push(self.k);
74//!         }
75//!         lengths
76//!     }
77//! }
78//!
79//! let constant = Constant::new(5);
80//! let lengths = constant.generate(10);
81//! println!("{:?}", lengths);
82//! ```
83//!
84//! ### Implementing the [`ProbabilityDistribution`] trait
85//!
86//! Implementing the [`ProbabilityDistribution`] trait is very similar to implementing the
87//! [`Distribution`] trait. The only difference is that this time we only have to implement a way to
88//! generate a single input size form an uniform distribution, and the trait will take care of
89//! implementing the [`Distribution`] trait for us.
90//!
91//! ```
92//! // Same as before
93//!
94//! use std::fmt::Debug;
95//! use chrono_probe::input::distribution::*;
96//!
97//! struct Constant {
98//!     k: usize,
99//! }
100//!
101//! impl Constant {
102//!     pub fn new(k: usize) -> Self { Self { k } }
103//! }
104//!
105//! impl Debug for Constant {
106//!     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
107//!        write!(f, "Costant")
108//!     }
109//! }
110//!
111//! // This time we implement the ProbabilityDistribution trait
112//! impl ProbabilityDistribution for Constant {
113//!     fn inverse_cdf(&self, _u: f64) -> f64 {
114//!        self.k as f64
115//!     }
116//! }
117//! ```
118//!
119//! For more information on how to implement a the [`ProbabilityDistribution`] trait, see the
120//! documentation of the trait itself.
121//!
122//! Note that this example is deliberately simple. In practice, you may want to generate
123//! input sizes that are more diverse than a constant value. Nevertheless, this example
124//! provides a basic understanding of how to create a custom distribution and can be used
125//! as a starting point for implementing more complex distributions tailored to your needs.
126//!
127//! ### Which approach should I use?
128//!
129//! The approach you should use depends on the complexity of the distribution you want to implement.
130//! If you want to implement a simple distribution, with a simple inverse cumulative distribution,
131//! you can use the [`ProbabilityDistribution`] trait. If you want to implement a more complex
132//! distribution, you should implement the [`Distribution`] trait directly.
133
134use std::fmt::Debug;
135use std::ops::RangeInclusive;
136
137use rand::{Rng, thread_rng};
138
139// =====================
140// = THE MODULE ITSELF =
141// =====================
142
143/// This trait defines a Distribution in an abstract way.
144///
145/// Without implementing lower level mechanisms this trait defines the shared behaviour of a
146/// distribution, i.e. the property of being able to generate the input sizes.
147pub trait Distribution: Debug {
148    /// Generates a vector of input sizes. The number of input sizes to generate is given as
149    /// argument.
150    fn generate(&self, n: usize) -> Vec<usize>;
151}
152
153/// This enum defines the possible generation types.
154#[derive(Debug, Clone)]
155pub enum GenerationType {
156    /// Generates input in fixed intervals.
157    FixedIntervals,
158    /// Generates input in random intervals.
159    Random,
160}
161
162// ==============================
163// = PREDEFINED IMPLEMENTATIONS =
164// ==============================
165
166/// This trait defines a certain probability distribution. It is used to generate input sizes
167/// according to the distribution.
168///
169/// If a type implements this trait, and the Debug trait, it also implements the Distribution trait.
170/// In this way, the user can easily create a probability distribution and use it to generate input
171/// sizes, without having to implement the Distribution trait.
172///
173pub trait ProbabilityDistribution {
174    /// This function takes a value x in \[0,1] uniformly distributed and returns a value that is
175    /// distributed according to the probability distribution chosen.
176    ///
177    /// This can be done with the [inverse transform sampling method](https://en.wikipedia.org/wiki/Inverse_transform_sampling):
178    /// given a random variable X with cumulative distribution function F, then F<sup>-1</sup>
179    /// <sub>X</sub>(U) follows the same distribution of X, where U is uniformly distributed in \[0,1].
180    fn inverse_cdf(&self, u: f64) -> f64;
181
182    /// Returns the generation type of the distribution.
183    ///
184    /// This is used to determine whether the input sizes should be generated in fixed intervals or
185    /// in random intervals. By default, it returns [`GenerationType::Random`] but it can be
186    /// overridden to return the desired generation type.
187    fn get_gen_type(&self) -> &GenerationType {
188        &GenerationType::Random
189    }
190}
191
192impl<T: ProbabilityDistribution + Debug> Distribution for T {
193    fn generate(&self, n: usize) -> Vec<usize> {
194        assert!(n > 0, "The number of input sizes must be greater than zero");
195        // Preallocating the vector of input sizes
196        let mut lengths = Vec::with_capacity(n);
197
198        for i in 0..n {
199            let u: f64 = match self.get_gen_type() {
200                GenerationType::FixedIntervals => {
201                    if n != 1 {
202                        i as f64 / (n - 1) as f64
203                    } else {
204                        0.0
205                    }
206                }
207                GenerationType::Random => thread_rng().gen::<f64>(),
208            };
209
210            let x = self.inverse_cdf(u);
211
212            lengths.push(x as usize);
213        }
214        lengths
215    }
216}
217
218/// The struct representing an uniform distribution.
219///
220/// Given a range, it generates a vector of uniform distributed input sizes.
221#[derive(Clone)]
222pub struct Uniform {
223    range: RangeInclusive<usize>,
224    gen_type: GenerationType,
225}
226
227impl Uniform {
228    /// Creates a new uniform distribution.
229    ///
230    /// # Arguments
231    ///
232    /// * `range` - The range of the distribution.
233    pub fn new(range: RangeInclusive<usize>) -> Self {
234        assert!(!range.is_empty(), "The range must not be empty.");
235        Uniform {
236            range,
237            gen_type: GenerationType::FixedIntervals,
238        }
239    }
240
241    /// Sets the generation type of the exponential distribution.
242    /// The generation type can be either fixed intervals or random.
243    ///
244    /// # Arguments
245    ///
246    /// * `gen_type` - The generation type.
247    pub fn set_gen_type(&mut self, gen_type: GenerationType) {
248        self.gen_type = gen_type;
249    }
250}
251
252impl Debug for Uniform {
253    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
254        write!(f, "Uniform, generation type: {:?}", self.gen_type)
255    }
256}
257
258impl ProbabilityDistribution for Uniform {
259    fn inverse_cdf(&self, u: f64) -> f64 {
260        let a = *self.range.start() as f64;
261        let b = (self.range.end() - self.range.start()) as f64;
262        a + b * u
263    }
264
265    fn get_gen_type(&self) -> &GenerationType {
266        &self.gen_type
267    }
268}
269
270/// The struct representing an exponential distribution.
271///
272/// Given a range, it generates a vector of input sizes using an exponential distribution.
273#[derive(Clone)]
274pub struct Exponential {
275    range: RangeInclusive<usize>,
276    lambda: f64,
277    gen_type: GenerationType,
278}
279
280impl Exponential {
281    /// Creates a new exponential distribution.
282    /// The mean of the distribution is set to match the mean of the reciprocal distribution.
283    ///
284    /// # Arguments
285    ///
286    /// * `range` - The range of the distribution.
287    pub fn new(range: RangeInclusive<usize>) -> Self {
288        assert!(!range.is_empty(), "The range must not be empty.");
289        let lambda =
290            ((range.end() / range.start()) as f64).ln() / ((range.end() - range.start()) as f64);
291        let gen_type = GenerationType::FixedIntervals;
292        Exponential {
293            range,
294            lambda,
295            gen_type,
296        }
297    }
298
299    /// Sets the &lambda; of the exponential distribution.
300    ///
301    /// # Arguments
302    ///
303    /// * `lambda` - The new &lambda; of the exponential distribution.
304    pub fn set_lambda(&mut self, lambda: f64) {
305        assert!(lambda > 0.0, "Lambda must be grater then zero");
306        self.lambda = lambda;
307    }
308
309    /// Sets the generation type of the exponential distribution.
310    /// The generation type can be either fixed intervals or random.
311    ///
312    /// # Arguments
313    ///
314    /// * `gen_type` - The new generation type of the exponential distribution.
315    pub fn set_gen_type(&mut self, gen_type: GenerationType) {
316        self.gen_type = gen_type;
317    }
318}
319
320impl Debug for Exponential {
321    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
322        write!(
323            f,
324            "Exponential λ={}, generation type: {:?}",
325            self.lambda, self.gen_type
326        )
327    }
328}
329
330impl ProbabilityDistribution for Exponential {
331    fn inverse_cdf(&self, u: f64) -> f64 {
332        /*
333        In order to generate an exponential distribution the inverse transform sampling method is
334        used. Given an uniform distributed value u ∈ [0, 1] a linear transformation is applied in
335        order to get e ∈ [F(min), F(max)] where F(x) is the cumulative distribution function of the
336        exponential distribution. The inverse of F(x) is then applied to e in order to get the
337        desired value:
338
339        F^-1(x) = -ln(1 - x) / lambda
340
341        Desired value = F^-1(e)
342        */
343
344        let min = *self.range.start() as f64;
345        let max = *self.range.end() as f64;
346        let lambda = self.lambda;
347
348        let x = lambda * min;
349        let y = lambda * max;
350        let z: f64;
351
352        if u == 1.0_f64 {
353            return max;
354        } else if u == 0.0 {
355            return min;
356        }
357
358        /*
359        If the difference between y and x is small enough, we can use the exact formula to compute the
360        desired value.
361         */
362        if y - x < f64::MAX_EXP as f64 * 2.0_f64.ln() {
363            z = y - ((1.0 - u) * (y - x).exp() + u).ln();
364            return z / lambda;
365        }
366
367        /*
368        If the difference between y and x is too big, we use the approximation formula to compute the
369        desired value.
370         */
371        if y - x > (1.0 / (1.0 - u)).ln() {
372            z = x - (1.0 - u).ln();
373        } else {
374            z = y - u.ln();
375        }
376
377        z / lambda
378    }
379
380    fn get_gen_type(&self) -> &GenerationType {
381        &self.gen_type
382    }
383}
384
385/// The struct representing a uniform distribution.
386///
387/// Given a range, it generates a vector of input sizes using a uniform distribution.
388pub struct Reciprocal {
389    range: RangeInclusive<usize>,
390    gen_type: GenerationType,
391}
392
393impl Reciprocal {
394    /// Creates a new uniform distribution.
395    ///
396    /// # Arguments
397    ///
398    /// * `range` - The range of the distribution.
399    pub fn new(range: RangeInclusive<usize>) -> Self {
400        assert!(!range.is_empty(), "The range must not be empty.");
401        Reciprocal {
402            range,
403            gen_type: GenerationType::FixedIntervals,
404        }
405    }
406
407    /// Sets the generation type of the reciprocal distribution.
408    /// The generation type can be either fixed intervals or random.
409    ///
410    /// # Arguments
411    ///
412    /// * `gen_type` - The new generation type of the exponential distribution.
413    pub fn set_gen_type(&mut self, gen_type: GenerationType) {
414        self.gen_type = gen_type;
415    }
416}
417
418impl Debug for Reciprocal {
419    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
420        write!(f, "Reciprocal, generation type: {:?}", self.gen_type)
421    }
422}
423
424impl ProbabilityDistribution for Reciprocal {
425    fn inverse_cdf(&self, u: f64) -> f64 {
426        (*self.range.end() as f64 / *self.range.start() as f64).powf(u) * *self.range.start() as f64
427    }
428
429    fn get_gen_type(&self) -> &GenerationType {
430        &self.gen_type
431    }
432}