criterion_cuda/
lib.rs

1#![doc = include_str!("../README.md")]
2
3use criterion::{
4    measurement::{Measurement, ValueFormatter},
5    Throughput,
6};
7use cust::prelude::{Stream, StreamFlags};
8use once_cell::sync::Lazy;
9
10/// `CudaTime` measures the time of one or multiple CUDA kernels via CUDA events
11pub struct CudaTime;
12
13pub static MEASUREMENT_STREAM: Lazy<Stream> = Lazy::new(|| {
14    Stream::new(StreamFlags::DEFAULT, None).unwrap()
15});
16
17impl Measurement for CudaTime {
18    type Intermediate = cust::event::Event;
19    type Value = f32;
20
21    fn start(&self) -> Self::Intermediate {
22        let event = cust::event::Event::new(cust::event::EventFlags::DEFAULT)
23            .expect("Failed to create event");
24        event
25            .record(&MEASUREMENT_STREAM)
26            .expect("Could not record CUDA event");
27        event
28    }
29
30    fn end(&self, start_event: Self::Intermediate) -> Self::Value {
31        let end_event = cust::event::Event::new(cust::event::EventFlags::DEFAULT)
32            .expect("Failed to create event");
33        end_event
34            .record(&MEASUREMENT_STREAM)
35            .expect("Could not record CUDA event");
36        end_event.synchronize().expect("Failed to synchronize");
37        end_event
38            .elapsed_time_f32(&start_event)
39            .expect("Failed to measure time")
40    }
41
42    fn add(&self, v1: &Self::Value, v2: &Self::Value) -> Self::Value {
43        v1 + v2
44    }
45
46    fn zero(&self) -> Self::Value {
47        0f32
48    }
49
50    fn to_f64(&self, value: &Self::Value) -> f64 {
51        *value as f64
52    }
53
54    fn formatter(&self) -> &dyn ValueFormatter {
55        &CudaTimeFormatter
56    }
57}
58
59struct CudaTimeFormatter;
60
61impl ValueFormatter for CudaTimeFormatter {
62    fn format_value(&self, value: f64) -> String {
63        format!("{:.4} ms", value)
64    }
65
66    fn format_throughput(&self, throughput: &Throughput, value: f64) -> String {
67        match throughput {
68            Throughput::Bytes(b) => format!(
69                "{:.4} GiB/s",
70                (*b as f64) / (1024.0 * 1024.0 * 1024.0) / (value * 1e-3)
71            ),
72            Throughput::Elements(b) => format!("{:.4} elements/s", (*b as f64) / (value * 1e-3)),
73        }
74    }
75
76    fn scale_values(&self, _typical_value: f64, _values: &mut [f64]) -> &'static str {
77        "ms"
78    }
79
80    /// TODO!
81    fn scale_throughputs(
82        &self,
83        _typical_value: f64,
84        throughput: &Throughput,
85        _values: &mut [f64],
86    ) -> &'static str {
87        match throughput {
88            Throughput::Bytes(_) => {
89                "GiB/s"
90            }
91            Throughput::Elements(_) => {
92                "elements/s"
93            }
94        }
95    }
96
97    fn scale_for_machines(&self, _values: &mut [f64]) -> &'static str {
98        "ms"
99    }
100}
101
102#[cfg(test)]
103mod tests {
104    #[test]
105    fn init_cuda_test() {
106        let _ctx = cust::quick_init().expect("could not create CUDA context");
107    }
108}