#[cfg(target_arch = "x86_64")]
use std::time::{Duration, Instant};
#[inline(always)]
#[cfg(target_arch = "x86_64")]
pub fn rdtsc() -> u64 {
unsafe {
let lo: u32;
let hi: u32;
std::arch::asm!(
"rdtsc",
out("eax") lo,
out("edx") hi,
options(nostack, nomem),
);
((hi as u64) << 32) | (lo as u64)
}
}
#[inline(always)]
#[cfg(target_arch = "x86_64")]
pub fn rdtscp() -> u64 {
unsafe {
let lo: u32;
let hi: u32;
let _aux: u32;
std::arch::asm!(
"rdtscp",
out("eax") lo,
out("edx") hi,
out("ecx") _aux,
options(nostack, nomem),
);
((hi as u64) << 32) | (lo as u64)
}
}
#[inline(always)]
#[cfg(target_arch = "x86_64")]
pub fn rdtsc_start() -> u64 {
unsafe {
let lo: u32;
let hi: u32;
std::arch::asm!(
"lfence",
"rdtsc",
"lfence",
out("eax") lo,
out("edx") hi,
options(nostack),
);
((hi as u64) << 32) | (lo as u64)
}
}
#[inline(always)]
#[cfg(target_arch = "x86_64")]
pub fn rdtsc_end() -> u64 {
unsafe {
let lo: u32;
let hi: u32;
let _aux: u32;
std::arch::asm!(
"rdtscp",
"lfence",
out("eax") lo,
out("edx") hi,
out("ecx") _aux,
options(nostack),
);
((hi as u64) << 32) | (lo as u64)
}
}
#[cfg(not(target_arch = "x86_64"))]
fn monotonic_nanos() -> u64 {
use std::sync::OnceLock;
use std::time::Instant;
static ORIGIN: OnceLock<Instant> = OnceLock::new();
ORIGIN.get_or_init(Instant::now).elapsed().as_nanos() as u64
}
#[cfg(not(target_arch = "x86_64"))]
pub fn rdtsc() -> u64 {
monotonic_nanos()
}
#[cfg(not(target_arch = "x86_64"))]
pub fn rdtscp() -> u64 {
monotonic_nanos()
}
#[cfg(not(target_arch = "x86_64"))]
pub fn rdtsc_start() -> u64 {
monotonic_nanos()
}
#[cfg(not(target_arch = "x86_64"))]
pub fn rdtsc_end() -> u64 {
monotonic_nanos()
}
#[cfg(target_arch = "x86_64")]
pub fn has_invariant_tsc() -> bool {
unsafe {
let edx: u32;
std::arch::asm!(
"push rbx",
"mov eax, 0x80000007",
"cpuid",
"pop rbx",
out("edx") edx,
inout("eax") 0x80000007u32 => _,
out("ecx") _,
options(nomem, preserves_flags),
);
(edx & (1 << 8)) != 0
}
}
#[cfg(target_arch = "x86_64")]
pub fn calibrate_frequency() -> u64 {
for _ in 0..100 {
rdtsc();
}
let mut best_freq: u64 = 0;
for _ in 0..5 {
let duration = Duration::from_millis(20);
let start_instant = Instant::now();
let start_cycles = rdtsc();
while start_instant.elapsed() < duration {
std::hint::spin_loop();
}
let end_cycles = rdtsc();
let elapsed = start_instant.elapsed();
let cycles = end_cycles - start_cycles;
let nanos = elapsed.as_nanos();
let freq = ((cycles as u128 * 1_000_000_000) / nanos) as u64;
best_freq = best_freq.max(freq);
}
best_freq
}
#[inline(always)]
#[cfg(target_arch = "x86_64")]
pub fn cycles_to_nanos(cycles: u64) -> u64 {
let mult = super::nanos_multiplier();
if mult == 0 {
cycles / 3
} else {
((cycles as u128 * mult as u128) >> 32) as u64
}
}
#[inline(always)]
#[cfg(not(target_arch = "x86_64"))]
pub fn cycles_to_nanos(cycles: u64) -> u64 {
cycles
}
#[inline(always)]
#[cfg(target_arch = "x86_64")]
pub fn nanos_to_cycles(nanos: u64) -> u64 {
let mult = super::cycles_multiplier();
if mult == 0 {
nanos * 3
} else {
((nanos as u128 * mult as u128) >> 32) as u64
}
}
#[inline(always)]
#[cfg(not(target_arch = "x86_64"))]
pub fn nanos_to_cycles(nanos: u64) -> u64 {
nanos
}
#[inline(always)]
pub fn fence() {
std::sync::atomic::fence(std::sync::atomic::Ordering::SeqCst);
}
#[inline(always)]
#[cfg(target_arch = "x86_64")]
pub fn serialize() {
unsafe {
std::arch::asm!(
"push rbx",
"cpuid",
"pop rbx",
inout("eax") 0 => _,
out("ecx") _,
out("edx") _,
options(nomem, preserves_flags),
);
}
}
#[inline(always)]
#[cfg(not(target_arch = "x86_64"))]
pub fn serialize() {}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_rdtsc() {
let start = rdtsc();
let mut sum = 0u64;
for i in 0..1000 {
sum = sum.wrapping_add(i);
}
let end = rdtsc();
assert!(end >= start);
std::hint::black_box(sum);
}
#[test]
#[cfg(target_arch = "x86_64")]
fn test_calibration() {
let freq = calibrate_frequency();
assert!(freq > 500_000_000); assert!(freq < 10_000_000_000);
eprintln!("calibrated frequency: {} ghz", freq as f64 / 1e9);
}
#[test]
fn test_conversion() {
super::super::init();
let cycles = 1_000_000_000; let nanos = cycles_to_nanos(cycles);
let cycles2 = nanos_to_cycles(nanos);
let diff = cycles2.abs_diff(cycles);
assert!(diff < cycles / 100); }
}