#[cfg(target_arch = "x86")]
use std::arch::x86;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64 as x86;
use std::time::{Duration, Instant};
use crate::time::{fence, TscUnavailable};
#[inline(always)]
pub(crate) fn start_timestamp() -> u64 {
util::lfence();
let tsc = util::rdtsc();
util::lfence();
tsc
}
#[inline(always)]
pub(crate) fn end_timestamp() -> u64 {
let tsc = util::rdtscp();
util::lfence();
tsc
}
pub(crate) fn frequency() -> Result<u64, TscUnavailable> {
if !util::tsc_is_available() {
return Err(TscUnavailable::MissingInstructions);
}
if !util::tsc_is_invariant() {
return Err(TscUnavailable::VariableFrequency);
}
let nominal = nominal_frequency();
let measured = measure::measure_frequency();
if let Some(nominal) = nominal {
if measured * 0.999 < nominal && nominal < measured * 1.001 {
return Ok(nominal.round() as u64);
}
}
Ok(measured.round() as u64)
}
fn nominal_frequency() -> Option<f64> {
let name = util::cpu_name()?;
let name = {
let len = name.iter().position(|&ch| ch == 0).unwrap_or(name.len());
std::str::from_utf8(&name[..len]).ok()?
};
#[rustfmt::skip]
let frequencies = [
("MHz", 1e6),
("GHz", 1e9),
("THz", 1e12),
];
for (unit, scale) in frequencies {
let Some(unit_start) = name.find(unit) else {
continue;
};
let pre_unit = &name[..unit_start];
let num = match pre_unit.rsplit_once(' ') {
Some((_, num)) => num,
None => pre_unit,
};
if let Ok(num) = num.parse::<f64>() {
return Some(num * scale);
};
}
None
}
mod util {
use super::*;
#[inline(always)]
pub fn rdtsc() -> u64 {
fence::compiler_fence();
let tsc = unsafe { x86::_rdtsc() };
fence::compiler_fence();
tsc
}
#[inline(always)]
pub fn rdtscp() -> u64 {
fence::compiler_fence();
let tsc = unsafe { x86::__rdtscp(&mut 0) };
fence::compiler_fence();
tsc
}
#[inline(always)]
pub fn lfence() {
unsafe { x86::_mm_lfence() }
}
#[inline]
fn cpuid(leaf: u32) -> x86::CpuidResult {
unsafe { x86::__cpuid(leaf) }
}
#[inline]
fn cpuid_array(leaf: u32) -> [u32; 4] {
let cpuid = cpuid(leaf);
[cpuid.eax, cpuid.ebx, cpuid.ecx, cpuid.edx]
}
#[inline]
fn cpuid_has_leaf(leaf: u32) -> bool {
cpuid(0x8000_0000).eax >= leaf
}
#[inline]
pub fn tsc_is_available() -> bool {
let bits = cpuid(0x8000_0001).edx;
let rdtsc = 1 << 4;
let rdtscp = 1 << 27;
bits & (rdtsc | rdtscp) != 0
}
#[inline]
pub fn tsc_is_invariant() -> bool {
let leaf = 0x8000_0007;
if !cpuid_has_leaf(leaf) {
return false;
}
cpuid(leaf).edx & (1 << 8) != 0
}
pub fn cpu_name() -> Option<[u8; 48]> {
if !cpuid_has_leaf(0x8000_0004) {
return None;
}
#[rustfmt::skip]
let result = [
cpuid_array(0x8000_0002),
cpuid_array(0x8000_0003),
cpuid_array(0x8000_0004),
];
Some(unsafe { std::mem::transmute(result) })
}
}
mod measure {
use super::*;
pub fn measure_frequency() -> f64 {
const TRIES: usize = 8;
let mut delay_ms = 1;
let mut prev_measure = f64::NEG_INFINITY;
let mut measures = [0.0; TRIES];
for slot in &mut measures {
let measure = measure_frequency_once(Duration::from_millis(delay_ms));
if measure * 0.999 < prev_measure && prev_measure < measure * 1.001 {
return measure;
}
*slot = measure;
prev_measure = measure;
delay_ms *= 2;
}
let mut min_delta = f64::INFINITY;
let mut result_index = 0;
for i in 0..TRIES {
for j in (i + 1)..TRIES {
let delta = (measures[i] - measures[j]).abs();
if delta < min_delta {
min_delta = delta;
result_index = i;
}
}
}
measures[result_index]
}
fn measure_frequency_once(delay: Duration) -> f64 {
let (start_tsc, start_instant) = tsc_instant_pair();
std::thread::sleep(delay);
let (end_tsc, end_instant) = tsc_instant_pair();
let elapsed_tsc = end_tsc.saturating_sub(start_tsc);
let elapsed_duration = end_instant.duration_since(start_instant);
(elapsed_tsc as f64 / elapsed_duration.as_nanos() as f64) * 1e9
}
fn tsc_instant_pair() -> (u64, Instant) {
let mut best_latency = Duration::MAX;
let mut best_pair = (0, Instant::now());
for _ in 0..100 {
let instant = Instant::now();
let tsc = util::rdtsc();
let latency = instant.elapsed();
let pair = (tsc, instant);
if latency.is_zero() {
return pair;
}
if latency < best_latency {
best_latency = latency;
best_pair = pair;
}
}
best_pair
}
}