Skip to main content

fluxbench_core/
measure.rs

1//! High-Precision Timing
2//!
3//! Uses RDTSCP on x86_64 and CNTVCT_EL0 on AArch64 for minimal overhead
4//! cycle counting, with fallback to std::time::Instant on other platforms.
5
6use std::time::Duration;
7
8// ─── Inline cycle counter helpers ────────────────────────────────────────────
9
10/// Read the CPU cycle/tick counter (platform-specific).
11#[cfg(target_arch = "x86_64")]
12#[inline(always)]
13fn read_cycles() -> u64 {
14    // SAFETY: RDTSCP is available on all x86_64 CPUs since ~2006.
15    // It is serializing by design — waits for all prior instructions
16    // to complete before reading the cycle counter.
17    unsafe {
18        let mut _aux: u32 = 0;
19        std::arch::x86_64::__rdtscp(&mut _aux)
20    }
21}
22
23/// Read the virtual counter timer on AArch64 (comparable to x86 TSC).
24#[cfg(target_arch = "aarch64")]
25#[inline(always)]
26fn read_cycles() -> u64 {
27    let cnt: u64;
28    // SAFETY: CNTVCT_EL0 is accessible from EL0 (userspace) on all
29    // AArch64 implementations. It provides a monotonically increasing
30    // counter at a fixed frequency (typically the system timer frequency).
31    unsafe {
32        std::arch::asm!("mrs {}, cntvct_el0", out(reg) cnt, options(nostack, nomem));
33    }
34    cnt
35}
36
37#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
38#[inline(always)]
39fn read_cycles() -> u64 {
40    0
41}
42
43/// Whether this platform provides real cycle counters.
44pub const HAS_CYCLE_COUNTER: bool = cfg!(target_arch = "x86_64") || cfg!(target_arch = "aarch64");
45
46// ─── Instant ─────────────────────────────────────────────────────────────────
47
48/// High-precision instant for benchmarking
49#[derive(Debug, Clone, Copy)]
50pub struct Instant {
51    instant: std::time::Instant,
52    tsc: u64,
53}
54
55impl Instant {
56    /// Capture current instant
57    #[inline(always)]
58    pub fn now() -> Self {
59        let tsc = read_cycles();
60        Self {
61            instant: std::time::Instant::now(),
62            tsc,
63        }
64    }
65
66    /// Compute elapsed time since this instant
67    #[inline(always)]
68    pub fn elapsed(&self) -> Duration {
69        self.instant.elapsed()
70    }
71
72    /// Raw cycle/tick count (non-zero on x86_64 and aarch64)
73    #[inline(always)]
74    pub fn cycles(&self) -> u64 {
75        self.tsc
76    }
77}
78
79// ─── Timer ───────────────────────────────────────────────────────────────────
80
81/// Timer for measuring benchmark iterations
82pub struct Timer {
83    start: Instant,
84    cycles_start: u64,
85}
86
87impl Timer {
88    /// Start a new timer
89    #[inline(always)]
90    pub fn start() -> Self {
91        let cycles_start = read_cycles();
92        Self {
93            start: Instant::now(),
94            cycles_start,
95        }
96    }
97
98    /// Stop the timer and return elapsed nanoseconds and cycles
99    #[inline(always)]
100    pub fn stop(&self) -> (u64, u64) {
101        let elapsed = self.start.elapsed();
102        let nanos = elapsed.as_nanos() as u64;
103        let cycles = read_cycles().saturating_sub(self.cycles_start);
104        (nanos, cycles)
105    }
106}
107
108/// Set CPU affinity to pin the current thread to a specific core
109///
110/// This improves TSC stability by avoiding core migrations.
111#[cfg(target_os = "linux")]
112pub fn pin_to_cpu(cpu: usize) -> Result<(), std::io::Error> {
113    use std::mem::MaybeUninit;
114
115    unsafe {
116        let mut set = MaybeUninit::<libc::cpu_set_t>::zeroed();
117        let set_ref = set.assume_init_mut();
118
119        libc::CPU_ZERO(set_ref);
120        libc::CPU_SET(cpu, set_ref);
121
122        let result = libc::sched_setaffinity(0, std::mem::size_of::<libc::cpu_set_t>(), set_ref);
123
124        if result == 0 {
125            Ok(())
126        } else {
127            Err(std::io::Error::last_os_error())
128        }
129    }
130}
131
132#[cfg(not(target_os = "linux"))]
133pub fn pin_to_cpu(_cpu: usize) -> Result<(), std::io::Error> {
134    // CPU pinning not supported on this platform
135    Ok(())
136}
137
138#[cfg(test)]
139mod tests {
140    use super::*;
141
142    #[test]
143    fn test_instant_elapsed() {
144        let start = Instant::now();
145        std::thread::sleep(Duration::from_millis(10));
146        let elapsed = start.elapsed();
147
148        // Should be at least 10ms
149        assert!(elapsed >= Duration::from_millis(5));
150        // Should be less than 100ms (accounting for scheduling)
151        assert!(elapsed < Duration::from_millis(100));
152    }
153
154    #[test]
155    fn test_timer() {
156        let timer = Timer::start();
157        std::thread::sleep(Duration::from_millis(10));
158        let (nanos, _cycles) = timer.stop();
159
160        // Should be at least 5ms in nanos
161        assert!(nanos >= 5_000_000);
162    }
163
164    #[test]
165    fn test_cycle_counter() {
166        if HAS_CYCLE_COUNTER {
167            let a = read_cycles();
168            let b = read_cycles();
169            assert!(b >= a, "cycle counter should be monotonic");
170        }
171    }
172}