chrono_probe/input/
mod.rs

1//! This module provides tools for generating input data helpful for testing algorithms.
2//!
3//! The [`Input`] trait is the core of this module and must be implemented by the input types
4//! used by algorithms. This trait defines two methods:
5//!
6//! * `get_size(&self) -> usize`: returns the size of the input.
7//! * `generate_input(size: usize) -> Self`: generates a random input of the given size.
8//!
9//! This module also provides the [`InputBuilder`] struct, which is used to build your input
10//! and store it into an [`InputSet`] instance. You can use the InputBuilder as soon as you
11//! have:
12//!
13//! * Figured out which distribution suits your needs (read the [distribution] documentation
14//! for more infos).
15//! * Created your input type (read the example below).
16//!
17//! # Example
18//!
19//! ## Basic usage
20//!
21//! Let's say we are testing the performance of our new algorithm to check if a given
22//! number is prime. We'll start by defining a new type to represent our input type:
23//!
24//! ```
25//! pub struct PrimeTestInput {
26//!     pub number: u32,
27//! }
28//! ```
29//!
30//! Next, we need to implement the [`Input`] trait for our new type:
31//!
32//! ```
33//! # use rand::Rng;
34//! # use chrono_probe::input::Input;
35//!
36//! # pub struct PrimeTestInput {
37//! #    pub number: u32,
38//! # }
39//!
40//! impl Input for PrimeTestInput {
41//!     type Builder = ();
42//!
43//!     // Return the size of the input.
44//!     fn get_size(&self) -> usize {
45//!         // We use the number of bits as size.
46//!         self.number.to_be_bytes().len() * 8
47//!     }
48//!
49//!     // Generate a random input of the given size.
50//!     fn generate_input(size: usize, builder: &Self::Builder) -> Self {
51//!         let mut rng = rand::thread_rng();
52//!         PrimeTestInput {
53//!             // We consider the size as the number of bits.
54//!             number: rng.gen_range(2u32.pow((size-1) as u32)..2u32.pow(size as u32)),
55//!         }
56//!     }
57//! }
58//! ```
59//!
60//! Now we can use our `PrimeTestInput` type to generate inputs for testing our algorithm!
61//!
62//! Note that the input size is taken as an argument by the `generate_input` method. If
63//! you want to know more about the input sizes generation, you can read the documentation
64//! of the [`distribution`] submodule.
65//!
66//! ## Multiple input generators
67//!
68//! Now, you may be curious about the [`Builder`](Input::Builder) type.
69//!
70//! The [`Builder`](Input::Builder) type is helpful when you want to choose between different
71//! input generators. In the previous example, we only have needed one generator, so we used ()
72//! as the [`Builder`](Input::Builder) type. However, if we had more than one generator, we
73//! could define an enum like this:
74//!
75//! ```
76//! pub enum Generator {
77//!     Fast,
78//!     Uniform,
79//! }
80//! ```
81//!
82//! Then, we could use `Generator` as the [`Builder`](Input::Builder) type:
83//!
84//! ```
85//! use chrono_probe::input::Input;
86//!
87//! # pub struct PrimeTestInput {
88//! #    pub number: u32,
89//! # }
90//!
91//! # pub enum Generator {
92//! #    Fast,
93//! #    Uniform,
94//! # }
95//!
96//! # fn generate_order_vector_fast(size: usize, min: u32, max: u32) -> PrimeTestInput { todo!() }
97//! # fn generate_order_vector(size: usize, min: u32, max: u32) -> PrimeTestInput { todo!() }
98//!
99//! impl Input for PrimeTestInput {
100//!     type Builder = Generator;
101//!
102//!     // Return the size of the input.
103//!     fn get_size(&self) -> usize {
104//!         // We use the number of bits as size.
105//!         self.number.to_be_bytes().len() * 8
106//!     }
107//!
108//!     // Generate a random input of the given size.
109//!     fn generate_input(size: usize, builder: &Self::Builder) -> Self {
110//!         match builder {
111//!             Generator::Fast => generate_order_vector_fast(size, u32::MIN, u32::MAX),
112//!             Generator::Uniform => generate_order_vector(size, u32::MIN, u32::MAX),
113//!         }
114//!     }
115//! }
116//! ```
117//!
118//! ### Using primitive types as input
119//!
120//! If you're new to Rust, you may be wondering why we need to create a new type for the input
121//! when we could just use the `u32` type itself. The reason is that only traits you own can be
122//! implemented for primitive types, and this library owns the [`Input`] trait, not your crate
123//! where you're using this library.
124//! If you need to use a primitive type as an input you need to create a new wrapper type, for
125//! more information refer to the [rust guide](https://doc.rust-lang.org/rust-by-example/generics/new_types.html)
126
127use std::fs::File;
128
129use serde::Serialize;
130
131use self::distribution::Distribution;
132
133pub mod distribution;
134
135/// Trait that must be implemented by algorithms' input types.
136pub trait Input {
137    /// The type of the builder. A builder can be used to select the type of generation for the inputs.
138    type Builder;
139    /// Returns the size of the input.
140    fn get_size(&self) -> usize;
141    /// Generates an input of the given size, using the given builder.
142    fn generate_input(size: usize, builder: &Self::Builder) -> Self;
143}
144
145/// Struct that holds the inputs.
146#[derive(Serialize)]
147pub struct InputSet<I: Input> {
148    /// The inputs.
149    /// The inputs are grouped by size.
150    pub inputs: Vec<Vec<I>>,
151}
152
153/// Struct used for building an [`InputSet`].
154#[derive(Serialize)]
155pub struct InputBuilder<I: Input, D: Distribution> {
156    // The distribution that will be used to generate the input lengths.
157    pub(crate) distribution: D,
158    // The builder that will be used to generate the inputs.
159    pub(crate) builder: I::Builder,
160}
161
162impl<I: Input, D: Distribution> InputBuilder<I, D> {
163    /// Creates a new [`InputBuilder`].
164    ///
165    /// # Arguments
166    ///
167    /// * `distribution` - The distribution that will be used to generate the input lengths.
168    /// * `builder` - The builder that will be used to generate the inputs.
169    pub fn new(distribution: D, builder: I::Builder) -> InputBuilder<I, D> {
170        InputBuilder {
171            distribution,
172            builder,
173        }
174    }
175
176    /// Generates the inputs.
177    ///
178    /// # Arguments
179    ///
180    /// * `n` - The number of inputs to be generated.
181    pub fn build(&self, n: usize) -> InputSet<I> {
182        self.build_with_repetitions(n, 1)
183    }
184
185    /// Generates the inputs with repetitions (i.e. multiple inputs with the same size).
186    /// This can be useful in order to obtain a more accurate result.
187    ///
188    /// # Arguments
189    ///
190    /// * `n` - The number of inputs to be generated (excluding repetitions: the actual amount of inputs generated is n*repetitions).
191    /// * `repetitions` - The number of repetitions for each input size.
192    pub fn build_with_repetitions(&self, n: usize, repetitions: usize) -> InputSet<I> {
193        // TODO: remove these assertions: usually asserts are not used in libraries
194        // A better way to handle this would be to return a Result instead of panicking
195        assert!(
196            n > 0,
197            "The number of inputs to be generated must be greater than 0"
198        );
199        assert!(
200            repetitions > 0,
201            "The number of repetitions must be greater than 0"
202        );
203
204        // Initialize the inputs vec with the correct capacity
205        let mut inputs = Vec::with_capacity(n);
206
207        // Generate the input lengths using the given distribution
208        let length_distribution = self.distribution.generate(n);
209
210        // Printing in the console for debug purposes
211        #[cfg(feature = "debug")]
212        println!("Generating inputs...\n");
213
214        // Iterate over the input lengths
215        for (_j, input_size) in length_distribution.iter().enumerate() {
216            // Initialize the vec holding the inputs with the same size
217            let mut inputs_with_same_size = Vec::with_capacity(repetitions);
218
219            // Iterate over the repetitions
220            for _ in 0..repetitions {
221                // Generate the inputs of the given size and push them to the vec
222                inputs_with_same_size.push(I::generate_input(*input_size, &self.builder));
223            }
224
225            // Push the vec holding the inputs with the same size to the inputs vec
226            inputs.push(inputs_with_same_size);
227
228            // Printing in the console the progress for debug purposes
229            #[cfg(feature = "debug")]
230            {
231                if _j % (n / 20) == 0 {
232                    println!("{}%", _j * 100 / n);
233                }
234            }
235        }
236
237        // Return the input set
238        InputSet { inputs }
239    }
240}
241
242impl<I: Input + Serialize> InputSet<I> {
243    /// Serializes the input set in a json file.
244    /// The file will be created if it doesn't exist, otherwise it will be overwritten.
245    ///
246    /// # Arguments
247    ///
248    /// * `filename` - The name of the file to be created.
249    ///
250    /// # Panics
251    ///
252    /// * Panics if the file cannot be created.
253    /// * Panics if the input set cannot be serialized.
254    ///
255    pub fn serialize_json(&self, filename: &str) {
256        let mut file = File::create(filename).unwrap();
257        serde_json::to_writer(&mut file, &self).unwrap();
258        // TODO: handle errors instead of panicking maybe returning a Result
259    }
260}