cubecl_common/
benchmark.rs

1use alloc::format;
2use alloc::string::String;
3use alloc::vec;
4use alloc::vec::Vec;
5use core::fmt::Display;
6use core::time::Duration;
7
8pub use crate::profile::{Instant, TimingMethod};
9
10#[cfg(feature = "std")]
11pub use crate::profile::ProfileDuration;
12
13/// Results of a benchmark run.
14#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
15#[derive(new, Debug, Clone)]
16pub struct BenchmarkDurations {
17    /// How these durations were measured.
18    pub timing_method: TimingMethod,
19    /// All durations of the run, in the order they were benchmarked
20    pub durations: Vec<Duration>,
21}
22
23impl BenchmarkDurations {
24    /// Construct from a list of durations.
25    pub fn from_durations(timing_method: TimingMethod, durations: Vec<Duration>) -> Self {
26        Self {
27            timing_method,
28            durations,
29        }
30    }
31
32    /// Returns a tuple of durations: (min, max, median)
33    fn min_max_median_durations(&self) -> (Duration, Duration, Duration) {
34        let mut sorted = self.durations.clone();
35        sorted.sort();
36        let min = *sorted.first().unwrap();
37        let max = *sorted.last().unwrap();
38        let median = *sorted.get(sorted.len() / 2).unwrap();
39        (min, max, median)
40    }
41
42    /// Returns the median duration among all durations
43    pub(crate) fn mean_duration(&self) -> Duration {
44        self.durations.iter().sum::<Duration>() / self.durations.len() as u32
45    }
46
47    /// Returns the variance durations for the durations
48    pub(crate) fn variance_duration(&self, mean: Duration) -> Duration {
49        self.durations
50            .iter()
51            .map(|duration| {
52                let tmp = duration.as_secs_f64() - mean.as_secs_f64();
53                Duration::from_secs_f64(tmp * tmp)
54            })
55            .sum::<Duration>()
56            / self.durations.len() as u32
57    }
58}
59
60impl Display for BenchmarkDurations {
61    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
62        let computed = BenchmarkComputations::new(self);
63        let BenchmarkComputations {
64            mean,
65            median,
66            variance,
67            min,
68            max,
69        } = computed;
70        let num_sample = self.durations.len();
71        let timing_method = self.timing_method;
72
73        f.write_str(
74            format!(
75                "
76―――――――― Result ―――――――――
77  Timing      {timing_method}
78  Samples     {num_sample}
79  Mean        {mean:.3?}
80  Variance    {variance:.3?}
81  Median      {median:.3?}
82  Min         {min:.3?}
83  Max         {max:.3?}
84―――――――――――――――――――――――――"
85            )
86            .as_str(),
87        )
88    }
89}
90
91/// Computed values from benchmark durations.
92#[cfg_attr(
93    feature = "serde",
94    derive(serde::Serialize, serde::Deserialize, PartialEq, Eq)
95)]
96#[derive(Debug, Default, Clone)]
97pub struct BenchmarkComputations {
98    /// Mean of all the durations.
99    pub mean: Duration,
100    /// Median of all the durations.
101    pub median: Duration,
102    /// Variance of all the durations.
103    pub variance: Duration,
104    /// Minimum duration amongst all durations.
105    pub min: Duration,
106    /// Maximum duration amongst all durations.
107    pub max: Duration,
108}
109
110impl BenchmarkComputations {
111    /// Compute duration values and return a BenchmarkComputations struct
112    pub fn new(durations: &BenchmarkDurations) -> Self {
113        let mean = durations.mean_duration();
114        let (min, max, median) = durations.min_max_median_durations();
115        Self {
116            mean,
117            median,
118            min,
119            max,
120            variance: durations.variance_duration(mean),
121        }
122    }
123}
124
125/// Benchmark trait.
126pub trait Benchmark {
127    /// Benchmark input arguments.
128    type Input: Clone;
129    /// The benchmark output.
130    type Output;
131
132    /// Prepare the benchmark, run anything that is essential for the benchmark, but shouldn't
133    /// count as included in the duration.
134    ///
135    /// # Notes
136    ///
137    /// This should not include warmup, the benchmark will be run at least one time without
138    /// measuring the execution time.
139    fn prepare(&self) -> Self::Input;
140
141    /// Execute the benchmark and returns the logical output of the task executed.
142    ///
143    /// It is important to return the output since otherwise deadcode optimization might optimize
144    /// away code that should be benchmarked.
145    fn execute(&self, input: Self::Input) -> Result<Self::Output, String>;
146
147    /// Number of samples per run required to have a statistical significance.
148    fn num_samples(&self) -> usize {
149        const DEFAULT: usize = 15;
150        #[cfg(feature = "std")]
151        {
152            std::env::var("BENCH_NUM_SAMPLES")
153                .map(|val| str::parse::<usize>(&val).unwrap_or(DEFAULT))
154                .unwrap_or(DEFAULT)
155        }
156
157        #[cfg(not(feature = "std"))]
158        {
159            DEFAULT
160        }
161    }
162
163    /// Name of the benchmark, should be short and it should match the name
164    /// defined in the crate Cargo.toml
165    fn name(&self) -> String;
166
167    /// The options passed to the benchmark.
168    fn options(&self) -> Option<String> {
169        None
170    }
171
172    /// Shapes dimensions
173    fn shapes(&self) -> Vec<Vec<usize>> {
174        vec![]
175    }
176
177    /// Wait for computation to complete.
178    fn sync(&self);
179
180    /// Start measuring the computation duration.
181    #[cfg(feature = "std")]
182    fn profile(&self, args: Self::Input) -> Result<ProfileDuration, String> {
183        self.profile_full(args)
184    }
185
186    /// Start measuring the computation duration. Use the full duration irregardless of whether
187    /// device duration is available or not.
188    #[cfg(feature = "std")]
189    fn profile_full(&self, args: Self::Input) -> Result<ProfileDuration, String> {
190        self.sync();
191        let start_time = Instant::now();
192        let out = self.execute(args)?;
193        self.sync();
194        core::mem::drop(out);
195        Ok(ProfileDuration::new_system_time(start_time, Instant::now()))
196    }
197
198    /// Run the benchmark a number of times.
199    #[allow(unused_variables)]
200    fn run(&self, timing_method: TimingMethod) -> Result<BenchmarkDurations, String> {
201        #[cfg(not(feature = "std"))]
202        panic!("Attempting to run benchmark in a no-std environment");
203
204        #[cfg(feature = "std")]
205        {
206            let execute = |args: &Self::Input| {
207                let profile: Result<ProfileDuration, String> = match timing_method {
208                    TimingMethod::System => self.profile_full(args.clone()),
209                    TimingMethod::Device => self.profile(args.clone()),
210                };
211                let profile = match profile {
212                    Ok(val) => val,
213                    Err(err) => return Err(err),
214                };
215                Ok(crate::future::block_on(profile.resolve()))
216            };
217            let args = self.prepare();
218
219            // Triggers JIT-compilation and perform a Warmup
220            //
221            // We are using 5 iterations, where the first one probably triggers the JIT-compilation
222            // and it is then followed by 4 warmup executions.
223            for _ in 0..5 {
224                let _duration: Result<crate::profile::ProfileTicks, _> = execute(&args);
225            }
226
227            // Real execution.
228            let mut durations = Vec::with_capacity(self.num_samples());
229            for _ in 0..self.num_samples() {
230                match execute(&args) {
231                    Ok(val) => durations.push(val.duration()),
232                    Err(err) => {
233                        return Err(err);
234                    }
235                }
236            }
237
238            Ok(BenchmarkDurations {
239                timing_method,
240                durations,
241            })
242        }
243    }
244}
245
246/// Result of a benchmark run, with metadata
247#[derive(Clone)]
248pub struct BenchmarkResult {
249    /// Individual raw results of the run
250    pub raw: BenchmarkDurations,
251    /// Computed values for the run
252    pub computed: BenchmarkComputations,
253    /// Git commit hash of the commit in which the run occurred
254    pub git_hash: String,
255    /// Name of the benchmark
256    pub name: String,
257    /// Options passed to the benchmark
258    pub options: Option<String>,
259    /// Shape dimensions
260    pub shapes: Vec<Vec<usize>>,
261    /// Time just before the run
262    pub timestamp: u128,
263}
264
265impl Display for BenchmarkResult {
266    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
267        f.write_str(
268            format!(
269                "
270        Timestamp: {}
271        Git Hash: {}
272        Benchmarking - {}{}
273        ",
274                self.timestamp, self.git_hash, self.name, self.raw
275            )
276            .as_str(),
277        )
278    }
279}
280
281#[cfg(feature = "std")]
282/// Runs the given benchmark on the device and prints result and information.
283pub fn run_benchmark<BM>(benchmark: BM) -> Result<BenchmarkResult, String>
284where
285    BM: Benchmark,
286{
287    let timestamp = std::time::SystemTime::now()
288        .duration_since(std::time::UNIX_EPOCH)
289        .unwrap()
290        .as_millis();
291    let output = std::process::Command::new("git")
292        .args(["rev-parse", "HEAD"])
293        .output()
294        .unwrap();
295    let git_hash = String::from_utf8(output.stdout).unwrap().trim().to_string();
296    let durations = benchmark.run(TimingMethod::System)?;
297
298    Ok(BenchmarkResult {
299        raw: durations.clone(),
300        computed: BenchmarkComputations::new(&durations),
301        git_hash,
302        name: benchmark.name(),
303        options: benchmark.options(),
304        shapes: benchmark.shapes(),
305        timestamp,
306    })
307}
308
309#[cfg(test)]
310mod tests {
311    use super::*;
312    use alloc::vec;
313
314    #[test]
315    fn test_min_max_median_durations_even_number_of_samples() {
316        let durations = BenchmarkDurations {
317            timing_method: TimingMethod::System,
318            durations: vec![
319                Duration::new(10, 0),
320                Duration::new(20, 0),
321                Duration::new(30, 0),
322                Duration::new(40, 0),
323                Duration::new(50, 0),
324            ],
325        };
326        let (min, max, median) = durations.min_max_median_durations();
327        assert_eq!(min, Duration::from_secs(10));
328        assert_eq!(max, Duration::from_secs(50));
329        assert_eq!(median, Duration::from_secs(30));
330    }
331
332    #[test]
333    fn test_min_max_median_durations_odd_number_of_samples() {
334        let durations = BenchmarkDurations {
335            timing_method: TimingMethod::System,
336            durations: vec![
337                Duration::new(18, 5),
338                Duration::new(20, 0),
339                Duration::new(30, 0),
340                Duration::new(40, 0),
341            ],
342        };
343        let (min, max, median) = durations.min_max_median_durations();
344        assert_eq!(min, Duration::from_nanos(18000000005_u64));
345        assert_eq!(max, Duration::from_secs(40));
346        assert_eq!(median, Duration::from_secs(30));
347    }
348
349    #[test]
350    fn test_mean_duration() {
351        let durations = BenchmarkDurations {
352            timing_method: TimingMethod::System,
353            durations: vec![
354                Duration::new(10, 0),
355                Duration::new(20, 0),
356                Duration::new(30, 0),
357                Duration::new(40, 0),
358            ],
359        };
360        let mean = durations.mean_duration();
361        assert_eq!(mean, Duration::from_secs(25));
362    }
363
364    #[test]
365    fn test_variance_duration() {
366        let durations = BenchmarkDurations {
367            timing_method: TimingMethod::System,
368            durations: vec![
369                Duration::new(10, 0),
370                Duration::new(20, 0),
371                Duration::new(30, 0),
372                Duration::new(40, 0),
373                Duration::new(50, 0),
374            ],
375        };
376        let mean = durations.mean_duration();
377        let variance = durations.variance_duration(mean);
378        assert_eq!(variance, Duration::from_secs(200));
379    }
380}