Skip to main content

oxiphysics_gpu/compute/
timestamp.rs

1// Copyright 2026 COOLJAPAN OU (Team KitaSan)
2// SPDX-License-Identifier: Apache-2.0
3
4//! CPU-side dispatch timing utilities for the GPU compute layer.
5//!
6//! [`ComputeDispatchTimer`] measures wall-clock time around a compute dispatch
7//! using [`std::time::Instant`].  On platforms where GPU timestamp queries are
8//! available (via the `wgpu-backend` feature), the companion [`GpuTimestamp`]
9//! type can record hardware-level start/end values from a timestamp query set.
10
11use std::time::Instant;
12
13/// Wall-clock timer for a single compute dispatch.
14///
15/// Usage:
16/// ```
17/// use oxiphysics_gpu::compute::timestamp::ComputeDispatchTimer;
18///
19/// let start = ComputeDispatchTimer::start_cpu();
20/// // ... do work ...
21/// let timer = ComputeDispatchTimer::stop_cpu(start);
22/// if let Some(ms) = timer.elapsed_ms() {
23///     println!("dispatch took {ms:.3} ms");
24/// }
25/// ```
26#[derive(Debug, Clone, Default)]
27pub struct ComputeDispatchTimer {
28    /// Elapsed time in nanoseconds, set after [`stop_cpu`](Self::stop_cpu).
29    pub elapsed_ns: Option<u64>,
30}
31
32impl ComputeDispatchTimer {
33    /// Create a new, unstopped timer.
34    pub fn new() -> Self {
35        Self { elapsed_ns: None }
36    }
37
38    /// Record the current instant as the start of a timed region.
39    ///
40    /// Pass the returned `Instant` to [`stop_cpu`](Self::stop_cpu) when the
41    /// region ends.
42    pub fn start_cpu() -> Instant {
43        Instant::now()
44    }
45
46    /// Stop the timer and return a `ComputeDispatchTimer` with elapsed time.
47    pub fn stop_cpu(start: Instant) -> Self {
48        Self {
49            elapsed_ns: Some(start.elapsed().as_nanos() as u64),
50        }
51    }
52
53    /// Return elapsed time in milliseconds, or `None` if the timer was never stopped.
54    pub fn elapsed_ms(&self) -> Option<f64> {
55        self.elapsed_ns.map(|ns| ns as f64 / 1_000_000.0)
56    }
57
58    /// Return elapsed time in microseconds, or `None` if the timer was never stopped.
59    pub fn elapsed_us(&self) -> Option<f64> {
60        self.elapsed_ns.map(|ns| ns as f64 / 1_000.0)
61    }
62
63    /// Return `true` if the timer has been stopped and holds a measurement.
64    pub fn has_measurement(&self) -> bool {
65        self.elapsed_ns.is_some()
66    }
67}
68
69/// GPU hardware timestamp pair (start / end), feature-gated to `wgpu-backend`.
70///
71/// In production use, these values are read back from a `wgpu::QuerySet` of
72/// type `Timestamp` after the GPU has signalled completion.  The raw values
73/// are in nanoseconds (after scaling by the adapter's timestamp period).
74#[cfg(feature = "wgpu-backend")]
75#[derive(Debug, Clone, Copy, Default)]
76pub struct GpuTimestamp {
77    /// Raw GPU timestamp at the start of the pass (nanoseconds).
78    pub start_ns: u64,
79    /// Raw GPU timestamp at the end of the pass (nanoseconds).
80    pub end_ns: u64,
81}
82
83#[cfg(feature = "wgpu-backend")]
84impl GpuTimestamp {
85    /// Create a new GPU timestamp pair.
86    pub fn new(start_ns: u64, end_ns: u64) -> Self {
87        Self { start_ns, end_ns }
88    }
89
90    /// Elapsed GPU time in nanoseconds (saturating subtraction).
91    pub fn elapsed_ns(&self) -> u64 {
92        self.end_ns.saturating_sub(self.start_ns)
93    }
94
95    /// Elapsed GPU time in milliseconds.
96    pub fn elapsed_ms(&self) -> f64 {
97        self.elapsed_ns() as f64 / 1_000_000.0
98    }
99}
100
101/// Compute the number of workgroups needed to cover `n_items` in the X dimension.
102///
103/// Returns `[0, 1, 1]` when `n_items` is zero to produce a no-op dispatch
104/// without panicking.
105///
106/// This is a pure helper used by both the feature-gated real backend and any
107/// CPU-side utilities that need to replicate the same dispatch sizing logic.
108///
109/// # Examples
110///
111/// ```
112/// use oxiphysics_gpu::compute::timestamp::dispatch_count_for;
113///
114/// assert_eq!(dispatch_count_for(0, 64), [0, 1, 1]);
115/// assert_eq!(dispatch_count_for(64, 64), [1, 1, 1]);
116/// assert_eq!(dispatch_count_for(65, 64), [2, 1, 1]);
117/// ```
118pub fn dispatch_count_for(n_items: usize, workgroup_size: u32) -> [u32; 3] {
119    if n_items == 0 {
120        return [0, 1, 1];
121    }
122    let ws = workgroup_size.max(1);
123    let x = (n_items as u32).div_ceil(ws);
124    [x, 1, 1]
125}
126
127#[cfg(test)]
128mod tests {
129    use super::*;
130    use std::time::Duration;
131
132    #[test]
133    fn test_cpu_timer_new_has_no_measurement() {
134        let t = ComputeDispatchTimer::new();
135        assert!(!t.has_measurement());
136        assert!(t.elapsed_ms().is_none());
137    }
138
139    #[test]
140    fn test_cpu_timer_stop_records_elapsed() {
141        // Use a tiny sleep to ensure elapsed > 0.
142        let start = ComputeDispatchTimer::start_cpu();
143        std::thread::sleep(Duration::from_millis(1));
144        let timer = ComputeDispatchTimer::stop_cpu(start);
145        assert!(timer.has_measurement());
146        let ns = timer.elapsed_ns.unwrap();
147        assert!(ns > 0, "elapsed_ns should be > 0, got {ns}");
148    }
149
150    #[test]
151    fn test_cpu_timer_elapsed_ms_positive() {
152        let start = ComputeDispatchTimer::start_cpu();
153        std::thread::sleep(Duration::from_millis(1));
154        let timer = ComputeDispatchTimer::stop_cpu(start);
155        let ms = timer.elapsed_ms().unwrap();
156        assert!(ms > 0.0, "elapsed_ms should be positive, got {ms}");
157    }
158
159    #[test]
160    fn test_dispatch_count_for_zero() {
161        assert_eq!(dispatch_count_for(0, 64), [0, 1, 1]);
162    }
163
164    #[test]
165    fn test_dispatch_count_for_exact() {
166        assert_eq!(dispatch_count_for(64, 64), [1, 1, 1]);
167    }
168
169    #[test]
170    fn test_dispatch_count_for_overflow() {
171        assert_eq!(dispatch_count_for(65, 64), [2, 1, 1]);
172    }
173
174    #[test]
175    fn test_dispatch_count_for_one() {
176        assert_eq!(dispatch_count_for(1, 64), [1, 1, 1]);
177    }
178
179    #[cfg(feature = "wgpu-backend")]
180    #[test]
181    fn test_gpu_timestamp_elapsed() {
182        let ts = GpuTimestamp::new(1000, 5000);
183        assert_eq!(ts.elapsed_ns(), 4000);
184        assert!((ts.elapsed_ms() - 0.004).abs() < 1e-9);
185    }
186
187    #[cfg(feature = "wgpu-backend")]
188    #[test]
189    fn test_gpu_timestamp_saturating_sub() {
190        // start > end should not panic
191        let ts = GpuTimestamp::new(5000, 1000);
192        assert_eq!(ts.elapsed_ns(), 0);
193    }
194}