#[cfg(not(all(target_arch = "aarch64", target_vendor = "apple")))]
use std::time::Instant;
#[derive(Copy, Clone, Debug)]
pub struct Tick {
#[cfg(all(target_arch = "aarch64", target_vendor = "apple"))]
cycles: u64,
#[cfg(not(all(target_arch = "aarch64", target_vendor = "apple")))]
instant: Instant,
}
impl Tick {
#[inline(always)]
pub fn now() -> Self {
#[cfg(all(target_arch = "aarch64", target_vendor = "apple"))]
{
Tick {
cycles: read_cntvct(),
}
}
#[cfg(not(all(target_arch = "aarch64", target_vendor = "apple")))]
{
Tick {
instant: Instant::now(),
}
}
}
#[inline(always)]
pub fn elapsed_ns(&self, start: Tick) -> u64 {
#[cfg(all(target_arch = "aarch64", target_vendor = "apple"))]
{
let dt = self.cycles.saturating_sub(start.cycles);
let freq = cntfrq_hz();
((dt as u128) * 1_000_000_000u128 / freq as u128) as u64
}
#[cfg(not(all(target_arch = "aarch64", target_vendor = "apple")))]
{
self.instant.duration_since(start.instant).as_nanos() as u64
}
}
#[inline(always)]
pub fn elapsed_us(&self, start: Tick) -> f64 {
self.elapsed_ns(start) as f64 / 1_000.0
}
#[inline(always)]
pub fn elapsed_ms(&self, start: Tick) -> f64 {
self.elapsed_ns(start) as f64 / 1_000_000.0
}
}
#[cfg(all(target_arch = "aarch64", target_vendor = "apple"))]
#[inline(always)]
fn read_cntvct() -> u64 {
let val: u64;
unsafe {
std::arch::asm!("mrs {0}, cntvct_el0", out(reg) val, options(nomem, nostack));
}
val
}
#[cfg(all(target_arch = "aarch64", target_vendor = "apple"))]
fn cntfrq_hz() -> u64 {
use std::sync::OnceLock;
static FREQ: OnceLock<u64> = OnceLock::new();
*FREQ.get_or_init(|| {
let val: u64;
unsafe {
std::arch::asm!("mrs {0}, cntfrq_el0", out(reg) val, options(nomem, nostack));
}
if val == 0 { 24_000_000 } else { val }
})
}
#[inline(always)]
pub fn time_ns<R>(f: impl FnOnce() -> R) -> (R, u64) {
let t0 = Tick::now();
let r = f();
let t1 = Tick::now();
(r, t1.elapsed_ns(t0))
}
pub struct CacheBuster {
buf: Vec<u8>,
}
impl CacheBuster {
pub fn new() -> Self {
Self::with_bytes(32 * 1024 * 1024)
}
pub fn with_bytes(bytes: usize) -> Self {
Self {
buf: vec![0u8; bytes],
}
}
#[inline(never)]
pub fn thrash(&mut self) {
let len = self.buf.len();
let ptr = self.buf.as_mut_ptr();
let mut acc: u8 = 0;
let mut i = 0usize;
while i < len {
unsafe {
let p = ptr.add(i);
acc = acc.wrapping_add(std::ptr::read_volatile(p));
std::ptr::write_volatile(p, acc);
}
i += 64;
}
std::hint::black_box(acc);
}
}
impl Default for CacheBuster {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn tick_is_monotonic() {
let a = Tick::now();
std::thread::sleep(std::time::Duration::from_micros(50));
let b = Tick::now();
assert!(b.elapsed_ns(a) > 0);
}
#[test]
fn elapsed_units_agree() {
let a = Tick::now();
std::thread::sleep(std::time::Duration::from_millis(2));
let b = Tick::now();
let ns = b.elapsed_ns(a);
assert!(ns >= 1_500_000, "expected >=1.5ms, got {ns}ns");
assert!((b.elapsed_ms(a) - ns as f64 / 1e6).abs() < 1e-6);
}
}