use std::hint::black_box as std_black_box;
use std::time::Instant;
#[inline]
pub fn black_box<T>(x: T) -> T {
std_black_box(x)
}
#[inline]
pub fn rdtsc() -> u64 {
#[cfg(target_arch = "x86_64")]
{
rdtsc_x86_64()
}
#[cfg(target_arch = "aarch64")]
{
rdtsc_aarch64()
}
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
{
rdtsc_fallback()
}
}
#[cfg(target_arch = "x86_64")]
#[inline]
fn rdtsc_x86_64() -> u64 {
std::sync::atomic::compiler_fence(std::sync::atomic::Ordering::SeqCst);
let cycles: u64;
unsafe {
std::arch::asm!(
"lfence",
"rdtsc",
"shl rdx, 32",
"or rax, rdx",
out("rax") cycles,
out("rdx") _,
options(nostack, nomem),
);
}
std::sync::atomic::compiler_fence(std::sync::atomic::Ordering::SeqCst);
cycles
}
#[cfg(target_arch = "aarch64")]
#[inline]
fn rdtsc_aarch64() -> u64 {
std::sync::atomic::compiler_fence(std::sync::atomic::Ordering::SeqCst);
let cycles: u64;
unsafe {
std::arch::asm!(
"isb",
"mrs {}, cntvct_el0",
out(reg) cycles,
options(nostack, nomem),
);
}
std::sync::atomic::compiler_fence(std::sync::atomic::Ordering::SeqCst);
cycles
}
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
#[inline]
fn rdtsc_fallback() -> u64 {
use std::sync::OnceLock;
static START: OnceLock<Instant> = OnceLock::new();
let start = START.get_or_init(Instant::now);
start.elapsed().as_nanos() as u64
}
pub fn cycles_per_ns() -> f64 {
const CALIBRATION_MS: u64 = 1;
const CALIBRATION_ITERATIONS: usize = 100;
let mut ratios = Vec::with_capacity(CALIBRATION_ITERATIONS);
for _ in 0..CALIBRATION_ITERATIONS {
let start_cycles = rdtsc();
let start_time = Instant::now();
std::thread::sleep(std::time::Duration::from_millis(CALIBRATION_MS));
let end_cycles = rdtsc();
let elapsed_nanos = start_time.elapsed().as_nanos() as u64;
if elapsed_nanos == 0 {
continue;
}
let cycles = end_cycles.saturating_sub(start_cycles);
ratios.push(cycles as f64 / elapsed_nanos as f64);
}
if ratios.is_empty() {
return 3.0;
}
ratios.sort_by(|a, b| a.total_cmp(b));
let mid = ratios.len() / 2;
if ratios.len() % 2 == 0 {
(ratios[mid - 1] + ratios[mid]) / 2.0
} else {
ratios[mid]
}
}
fn estimate_resolution_ns(cycles_per_ns: f64) -> f64 {
#[cfg(target_arch = "aarch64")]
{
if cycles_per_ns > 0.0 && cycles_per_ns < 0.1 {
1.0 / cycles_per_ns
} else {
measure_timer_resolution(cycles_per_ns)
}
}
#[cfg(target_arch = "x86_64")]
{
if cycles_per_ns > 0.0 {
1.0 / cycles_per_ns
} else {
1.0 }
}
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
{
measure_timer_resolution(cycles_per_ns)
}
}
#[cfg_attr(target_arch = "x86_64", allow(dead_code))] fn measure_timer_resolution(cycles_per_ns: f64) -> f64 {
let mut min_diff = u64::MAX;
for _ in 0..1000 {
let t1 = rdtsc();
let t2 = rdtsc();
let diff = t2.saturating_sub(t1);
if diff > 0 && diff < min_diff {
min_diff = diff;
}
}
if min_diff == u64::MAX || cycles_per_ns <= 0.0 {
1.0 } else {
min_diff as f64 / cycles_per_ns
}
}
#[derive(Debug, Clone)]
pub struct Timer {
cycles_per_ns: f64,
resolution_ns: f64,
}
impl Timer {
pub fn new() -> Self {
let cpn = cycles_per_ns();
let resolution = estimate_resolution_ns(cpn);
Self {
cycles_per_ns: cpn,
resolution_ns: resolution,
}
}
pub fn with_cycles_per_ns(cycles_per_ns: f64) -> Self {
let resolution = estimate_resolution_ns(cycles_per_ns);
Self {
cycles_per_ns,
resolution_ns: resolution,
}
}
pub fn cycles_per_ns(&self) -> f64 {
self.cycles_per_ns
}
pub fn resolution_ns(&self) -> f64 {
self.resolution_ns
}
pub fn suggested_iterations(&self, target_resolution_ns: f64) -> usize {
if self.resolution_ns <= target_resolution_ns {
1
} else {
let multiplier = (self.resolution_ns * 10.0 / target_resolution_ns).ceil() as usize;
multiplier.max(1)
}
}
#[inline]
pub fn measure_cycles<F, T>(&self, f: F) -> super::error::MeasurementResult
where
F: FnOnce() -> T,
{
let start = rdtsc();
black_box(f());
let end = rdtsc();
Ok(end.saturating_sub(start))
}
#[inline]
pub fn measure_ns<F, T>(&self, f: F) -> Result<f64, super::error::MeasurementError>
where
F: FnOnce() -> T,
{
let cycles = self.measure_cycles(f)?;
Ok(self.cycles_to_ns(cycles))
}
#[inline]
pub fn cycles_to_ns(&self, cycles: u64) -> f64 {
cycles as f64 / self.cycles_per_ns
}
#[inline]
pub fn measure_batched_cycles<F, T>(
&self,
iterations: usize,
mut f: F,
) -> super::error::MeasurementResult
where
F: FnMut() -> T,
{
if iterations <= 1 {
return self.measure_cycles(f);
}
let start = rdtsc();
for _ in 0..iterations {
black_box(f());
}
let end = rdtsc();
let total_cycles = end.saturating_sub(start);
Ok(total_cycles / iterations as u64)
}
#[inline]
pub fn measure_batched_ns<F, T>(
&self,
iterations: usize,
f: F,
) -> Result<f64, super::error::MeasurementError>
where
F: FnMut() -> T,
{
let cycles = self.measure_batched_cycles(iterations, f)?;
Ok(self.cycles_to_ns(cycles))
}
}
impl Default for Timer {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_rdtsc_monotonic() {
let a = rdtsc();
let b = rdtsc();
assert!(b >= a || a.saturating_sub(b) < 1000);
}
#[test]
fn test_cycles_per_ns_reasonable() {
let cpn = cycles_per_ns();
assert!(cpn > 0.01 && cpn < 10.0, "cycles_per_ns = {}", cpn);
}
#[test]
fn test_timer_measure() {
let timer = Timer::new();
let cycles = timer
.measure_cycles(|| {
let mut sum = 0u64;
for i in 0..100_000 {
sum = sum.wrapping_add(black_box(i));
}
black_box(sum)
})
.expect("measure_cycles should not fail");
assert!(cycles > 0, "cycles should be positive, got {}", cycles);
}
#[test]
fn test_timer_resolution_reasonable() {
let timer = Timer::new();
let resolution = timer.resolution_ns();
assert!(
resolution > 0.1 && resolution < 100.0,
"resolution_ns = {}",
resolution
);
}
#[test]
fn test_suggested_iterations() {
let timer = Timer::new();
let suggested = timer.suggested_iterations(10.0);
assert!(suggested >= 1, "suggested = {}", suggested);
#[cfg(target_arch = "aarch64")]
{
assert!(
suggested > 1,
"ARM should suggest batching, got {}",
suggested
);
}
}
#[test]
fn test_batched_measurement() {
let timer = Timer::new();
let single = timer
.measure_cycles(|| black_box(42))
.expect("measure_cycles should not fail");
let batched = timer
.measure_batched_cycles(100, || black_box(42))
.expect("measure_batched_cycles should not fail");
assert!(
batched <= single * 2,
"single={}, batched={}",
single,
batched
);
}
}