use std::time::Instant;
#[derive(Debug, Clone, Default)]
pub struct ComputeDispatchTimer {
pub elapsed_ns: Option<u64>,
}
impl ComputeDispatchTimer {
pub fn new() -> Self {
Self { elapsed_ns: None }
}
pub fn start_cpu() -> Instant {
Instant::now()
}
pub fn stop_cpu(start: Instant) -> Self {
Self {
elapsed_ns: Some(start.elapsed().as_nanos() as u64),
}
}
pub fn elapsed_ms(&self) -> Option<f64> {
self.elapsed_ns.map(|ns| ns as f64 / 1_000_000.0)
}
pub fn elapsed_us(&self) -> Option<f64> {
self.elapsed_ns.map(|ns| ns as f64 / 1_000.0)
}
pub fn has_measurement(&self) -> bool {
self.elapsed_ns.is_some()
}
}
#[cfg(feature = "wgpu-backend")]
#[derive(Debug, Clone, Copy, Default)]
pub struct GpuTimestamp {
pub start_ns: u64,
pub end_ns: u64,
}
#[cfg(feature = "wgpu-backend")]
impl GpuTimestamp {
pub fn new(start_ns: u64, end_ns: u64) -> Self {
Self { start_ns, end_ns }
}
pub fn elapsed_ns(&self) -> u64 {
self.end_ns.saturating_sub(self.start_ns)
}
pub fn elapsed_ms(&self) -> f64 {
self.elapsed_ns() as f64 / 1_000_000.0
}
}
pub fn dispatch_count_for(n_items: usize, workgroup_size: u32) -> [u32; 3] {
if n_items == 0 {
return [0, 1, 1];
}
let ws = workgroup_size.max(1);
let x = (n_items as u32).div_ceil(ws);
[x, 1, 1]
}
#[cfg(test)]
mod tests {
use super::*;
use std::time::Duration;
#[test]
fn test_cpu_timer_new_has_no_measurement() {
let t = ComputeDispatchTimer::new();
assert!(!t.has_measurement());
assert!(t.elapsed_ms().is_none());
}
#[test]
fn test_cpu_timer_stop_records_elapsed() {
let start = ComputeDispatchTimer::start_cpu();
std::thread::sleep(Duration::from_millis(1));
let timer = ComputeDispatchTimer::stop_cpu(start);
assert!(timer.has_measurement());
let ns = timer.elapsed_ns.unwrap();
assert!(ns > 0, "elapsed_ns should be > 0, got {ns}");
}
#[test]
fn test_cpu_timer_elapsed_ms_positive() {
let start = ComputeDispatchTimer::start_cpu();
std::thread::sleep(Duration::from_millis(1));
let timer = ComputeDispatchTimer::stop_cpu(start);
let ms = timer.elapsed_ms().unwrap();
assert!(ms > 0.0, "elapsed_ms should be positive, got {ms}");
}
#[test]
fn test_dispatch_count_for_zero() {
assert_eq!(dispatch_count_for(0, 64), [0, 1, 1]);
}
#[test]
fn test_dispatch_count_for_exact() {
assert_eq!(dispatch_count_for(64, 64), [1, 1, 1]);
}
#[test]
fn test_dispatch_count_for_overflow() {
assert_eq!(dispatch_count_for(65, 64), [2, 1, 1]);
}
#[test]
fn test_dispatch_count_for_one() {
assert_eq!(dispatch_count_for(1, 64), [1, 1, 1]);
}
#[cfg(feature = "wgpu-backend")]
#[test]
fn test_gpu_timestamp_elapsed() {
let ts = GpuTimestamp::new(1000, 5000);
assert_eq!(ts.elapsed_ns(), 4000);
assert!((ts.elapsed_ms() - 0.004).abs() < 1e-9);
}
#[cfg(feature = "wgpu-backend")]
#[test]
fn test_gpu_timestamp_saturating_sub() {
let ts = GpuTimestamp::new(5000, 1000);
assert_eq!(ts.elapsed_ns(), 0);
}
}