chrono_probe/input/distribution.rs
1//! This module implements an easy way to abstract the generation of input sizes.
2//!
3//! Provides:
4//! 1) A trait that can be used to define your own distribution for input sizes.
5//! 2) A trait that can be used to define your a general probability distribution.
6//! 3) A set of predefined distributions.
7//!
8//! # Examples
9//!
10//! To test this module you can easily copy and paste the following code snippets.
11//!
12//! ## Predefined distributions
13//!
14//! This example demonstrates how to use a predefined distribution to generate a vector of input
15//! sizes. In this specific case, we use the [`Uniform`] distribution as an example.
16//!
17//! ```
18//! use chrono_probe::input::distribution::*;
19//!
20//! // First, we create an instance of the Uniform distribution
21//! let uniform = Uniform::new(1..=100);
22//!
23//! // Then we generate a vector of 10 input sizes using the distribution
24//! let lengths = uniform.generate(10);
25//!
26//! // Finally, we print the vector of input sizes
27//! println!("{:?}", lengths);
28//! ```
29//!
30//! ## Custom distribution
31//!
32//! In this example we will cover the steps needed to create a custom distribution.
33//! The goal is to generate a vector of input sizes that are all equal to a given constant.
34//! To achieve this goal, we will take two different approaches:
35//!
36//! * Implement the [`Distribution`] trait directly
37//! * Implement the [`ProbabilityDistribution`] trait
38//!
39//! ### Implementing the [`Distribution`] trait
40//!
41//! * Create a struct representing the custom distribution
42//! * Implement a way of creating an instance of the distribution
43//! * Implement the [`Debug`] trait to allow printing the name of your distribution in the plots
44//! * Implement the [`Distribution`] trait, which specifies how to generate the input sizes
45//!
46//! ```
47//! use std::fmt::Debug;
48//!
49//! use chrono_probe::input::distribution::*;
50//!
51//! // First, we create the struct representing the custom distribution
52//! struct Constant {
53//! k: usize,
54//! }
55//!
56//! // Then we implement a way of creating an instance of the distribution
57//! impl Constant {
58//! pub fn new(k: usize) -> Self { Self { k } }
59//! }
60//!
61//! // By implementing the Display trait, we can print the name of our distribution in the plots
62//! impl Debug for Constant {
63//! fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
64//! write!(f, "Costant")
65//! }
66//! }
67//!
68//! // Lastly, we implement the Distribution trait, which specifies how to generate the input sizes
69//! impl Distribution for Constant {
70//! fn generate(&self, n: usize) -> Vec<usize> {
71//! let mut lengths = Vec::with_capacity(n);
72//! for _ in 0..n {
73//! lengths.push(self.k);
74//! }
75//! lengths
76//! }
77//! }
78//!
79//! let constant = Constant::new(5);
80//! let lengths = constant.generate(10);
81//! println!("{:?}", lengths);
82//! ```
83//!
84//! ### Implementing the [`ProbabilityDistribution`] trait
85//!
86//! Implementing the [`ProbabilityDistribution`] trait is very similar to implementing the
87//! [`Distribution`] trait. The only difference is that this time we only have to implement a way to
88//! generate a single input size form an uniform distribution, and the trait will take care of
89//! implementing the [`Distribution`] trait for us.
90//!
91//! ```
92//! // Same as before
93//!
94//! use std::fmt::Debug;
95//! use chrono_probe::input::distribution::*;
96//!
97//! struct Constant {
98//! k: usize,
99//! }
100//!
101//! impl Constant {
102//! pub fn new(k: usize) -> Self { Self { k } }
103//! }
104//!
105//! impl Debug for Constant {
106//! fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
107//! write!(f, "Costant")
108//! }
109//! }
110//!
111//! // This time we implement the ProbabilityDistribution trait
112//! impl ProbabilityDistribution for Constant {
113//! fn inverse_cdf(&self, _u: f64) -> f64 {
114//! self.k as f64
115//! }
116//! }
117//! ```
118//!
119//! For more information on how to implement a the [`ProbabilityDistribution`] trait, see the
120//! documentation of the trait itself.
121//!
122//! Note that this example is deliberately simple. In practice, you may want to generate
123//! input sizes that are more diverse than a constant value. Nevertheless, this example
124//! provides a basic understanding of how to create a custom distribution and can be used
125//! as a starting point for implementing more complex distributions tailored to your needs.
126//!
127//! ### Which approach should I use?
128//!
129//! The approach you should use depends on the complexity of the distribution you want to implement.
130//! If you want to implement a simple distribution, with a simple inverse cumulative distribution,
131//! you can use the [`ProbabilityDistribution`] trait. If you want to implement a more complex
132//! distribution, you should implement the [`Distribution`] trait directly.
133
134use std::fmt::Debug;
135use std::ops::RangeInclusive;
136
137use rand::{Rng, thread_rng};
138
139// =====================
140// = THE MODULE ITSELF =
141// =====================
142
143/// This trait defines a Distribution in an abstract way.
144///
145/// Without implementing lower level mechanisms this trait defines the shared behaviour of a
146/// distribution, i.e. the property of being able to generate the input sizes.
147pub trait Distribution: Debug {
148 /// Generates a vector of input sizes. The number of input sizes to generate is given as
149 /// argument.
150 fn generate(&self, n: usize) -> Vec<usize>;
151}
152
153/// This enum defines the possible generation types.
154#[derive(Debug, Clone)]
155pub enum GenerationType {
156 /// Generates input in fixed intervals.
157 FixedIntervals,
158 /// Generates input in random intervals.
159 Random,
160}
161
162// ==============================
163// = PREDEFINED IMPLEMENTATIONS =
164// ==============================
165
166/// This trait defines a certain probability distribution. It is used to generate input sizes
167/// according to the distribution.
168///
169/// If a type implements this trait, and the Debug trait, it also implements the Distribution trait.
170/// In this way, the user can easily create a probability distribution and use it to generate input
171/// sizes, without having to implement the Distribution trait.
172///
173pub trait ProbabilityDistribution {
174 /// This function takes a value x in \[0,1] uniformly distributed and returns a value that is
175 /// distributed according to the probability distribution chosen.
176 ///
177 /// This can be done with the [inverse transform sampling method](https://en.wikipedia.org/wiki/Inverse_transform_sampling):
178 /// given a random variable X with cumulative distribution function F, then F<sup>-1</sup>
179 /// <sub>X</sub>(U) follows the same distribution of X, where U is uniformly distributed in \[0,1].
180 fn inverse_cdf(&self, u: f64) -> f64;
181
182 /// Returns the generation type of the distribution.
183 ///
184 /// This is used to determine whether the input sizes should be generated in fixed intervals or
185 /// in random intervals. By default, it returns [`GenerationType::Random`] but it can be
186 /// overridden to return the desired generation type.
187 fn get_gen_type(&self) -> &GenerationType {
188 &GenerationType::Random
189 }
190}
191
192impl<T: ProbabilityDistribution + Debug> Distribution for T {
193 fn generate(&self, n: usize) -> Vec<usize> {
194 assert!(n > 0, "The number of input sizes must be greater than zero");
195 // Preallocating the vector of input sizes
196 let mut lengths = Vec::with_capacity(n);
197
198 for i in 0..n {
199 let u: f64 = match self.get_gen_type() {
200 GenerationType::FixedIntervals => {
201 if n != 1 {
202 i as f64 / (n - 1) as f64
203 } else {
204 0.0
205 }
206 }
207 GenerationType::Random => thread_rng().gen::<f64>(),
208 };
209
210 let x = self.inverse_cdf(u);
211
212 lengths.push(x as usize);
213 }
214 lengths
215 }
216}
217
218/// The struct representing an uniform distribution.
219///
220/// Given a range, it generates a vector of uniform distributed input sizes.
221#[derive(Clone)]
222pub struct Uniform {
223 range: RangeInclusive<usize>,
224 gen_type: GenerationType,
225}
226
227impl Uniform {
228 /// Creates a new uniform distribution.
229 ///
230 /// # Arguments
231 ///
232 /// * `range` - The range of the distribution.
233 pub fn new(range: RangeInclusive<usize>) -> Self {
234 assert!(!range.is_empty(), "The range must not be empty.");
235 Uniform {
236 range,
237 gen_type: GenerationType::FixedIntervals,
238 }
239 }
240
241 /// Sets the generation type of the exponential distribution.
242 /// The generation type can be either fixed intervals or random.
243 ///
244 /// # Arguments
245 ///
246 /// * `gen_type` - The generation type.
247 pub fn set_gen_type(&mut self, gen_type: GenerationType) {
248 self.gen_type = gen_type;
249 }
250}
251
252impl Debug for Uniform {
253 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
254 write!(f, "Uniform, generation type: {:?}", self.gen_type)
255 }
256}
257
258impl ProbabilityDistribution for Uniform {
259 fn inverse_cdf(&self, u: f64) -> f64 {
260 let a = *self.range.start() as f64;
261 let b = (self.range.end() - self.range.start()) as f64;
262 a + b * u
263 }
264
265 fn get_gen_type(&self) -> &GenerationType {
266 &self.gen_type
267 }
268}
269
270/// The struct representing an exponential distribution.
271///
272/// Given a range, it generates a vector of input sizes using an exponential distribution.
273#[derive(Clone)]
274pub struct Exponential {
275 range: RangeInclusive<usize>,
276 lambda: f64,
277 gen_type: GenerationType,
278}
279
280impl Exponential {
281 /// Creates a new exponential distribution.
282 /// The mean of the distribution is set to match the mean of the reciprocal distribution.
283 ///
284 /// # Arguments
285 ///
286 /// * `range` - The range of the distribution.
287 pub fn new(range: RangeInclusive<usize>) -> Self {
288 assert!(!range.is_empty(), "The range must not be empty.");
289 let lambda =
290 ((range.end() / range.start()) as f64).ln() / ((range.end() - range.start()) as f64);
291 let gen_type = GenerationType::FixedIntervals;
292 Exponential {
293 range,
294 lambda,
295 gen_type,
296 }
297 }
298
299 /// Sets the λ of the exponential distribution.
300 ///
301 /// # Arguments
302 ///
303 /// * `lambda` - The new λ of the exponential distribution.
304 pub fn set_lambda(&mut self, lambda: f64) {
305 assert!(lambda > 0.0, "Lambda must be grater then zero");
306 self.lambda = lambda;
307 }
308
309 /// Sets the generation type of the exponential distribution.
310 /// The generation type can be either fixed intervals or random.
311 ///
312 /// # Arguments
313 ///
314 /// * `gen_type` - The new generation type of the exponential distribution.
315 pub fn set_gen_type(&mut self, gen_type: GenerationType) {
316 self.gen_type = gen_type;
317 }
318}
319
320impl Debug for Exponential {
321 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
322 write!(
323 f,
324 "Exponential λ={}, generation type: {:?}",
325 self.lambda, self.gen_type
326 )
327 }
328}
329
330impl ProbabilityDistribution for Exponential {
331 fn inverse_cdf(&self, u: f64) -> f64 {
332 /*
333 In order to generate an exponential distribution the inverse transform sampling method is
334 used. Given an uniform distributed value u ∈ [0, 1] a linear transformation is applied in
335 order to get e ∈ [F(min), F(max)] where F(x) is the cumulative distribution function of the
336 exponential distribution. The inverse of F(x) is then applied to e in order to get the
337 desired value:
338
339 F^-1(x) = -ln(1 - x) / lambda
340
341 Desired value = F^-1(e)
342 */
343
344 let min = *self.range.start() as f64;
345 let max = *self.range.end() as f64;
346 let lambda = self.lambda;
347
348 let x = lambda * min;
349 let y = lambda * max;
350 let z: f64;
351
352 if u == 1.0_f64 {
353 return max;
354 } else if u == 0.0 {
355 return min;
356 }
357
358 /*
359 If the difference between y and x is small enough, we can use the exact formula to compute the
360 desired value.
361 */
362 if y - x < f64::MAX_EXP as f64 * 2.0_f64.ln() {
363 z = y - ((1.0 - u) * (y - x).exp() + u).ln();
364 return z / lambda;
365 }
366
367 /*
368 If the difference between y and x is too big, we use the approximation formula to compute the
369 desired value.
370 */
371 if y - x > (1.0 / (1.0 - u)).ln() {
372 z = x - (1.0 - u).ln();
373 } else {
374 z = y - u.ln();
375 }
376
377 z / lambda
378 }
379
380 fn get_gen_type(&self) -> &GenerationType {
381 &self.gen_type
382 }
383}
384
385/// The struct representing a uniform distribution.
386///
387/// Given a range, it generates a vector of input sizes using a uniform distribution.
388pub struct Reciprocal {
389 range: RangeInclusive<usize>,
390 gen_type: GenerationType,
391}
392
393impl Reciprocal {
394 /// Creates a new uniform distribution.
395 ///
396 /// # Arguments
397 ///
398 /// * `range` - The range of the distribution.
399 pub fn new(range: RangeInclusive<usize>) -> Self {
400 assert!(!range.is_empty(), "The range must not be empty.");
401 Reciprocal {
402 range,
403 gen_type: GenerationType::FixedIntervals,
404 }
405 }
406
407 /// Sets the generation type of the reciprocal distribution.
408 /// The generation type can be either fixed intervals or random.
409 ///
410 /// # Arguments
411 ///
412 /// * `gen_type` - The new generation type of the exponential distribution.
413 pub fn set_gen_type(&mut self, gen_type: GenerationType) {
414 self.gen_type = gen_type;
415 }
416}
417
418impl Debug for Reciprocal {
419 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
420 write!(f, "Reciprocal, generation type: {:?}", self.gen_type)
421 }
422}
423
424impl ProbabilityDistribution for Reciprocal {
425 fn inverse_cdf(&self, u: f64) -> f64 {
426 (*self.range.end() as f64 / *self.range.start() as f64).powf(u) * *self.range.start() as f64
427 }
428
429 fn get_gen_type(&self) -> &GenerationType {
430 &self.gen_type
431 }
432}