divan/time/timestamp/tsc/
x86.rs

1#[cfg(target_arch = "x86")]
2use std::arch::x86;
3
4#[cfg(target_arch = "x86_64")]
5use std::arch::x86_64 as x86;
6
7use std::time::{Duration, Instant};
8
9use crate::time::{fence, TscUnavailable};
10
11#[inline(always)]
12pub(crate) fn start_timestamp() -> u64 {
13    // Serialize previous operations before `rdtsc` to ensure they are not
14    // inside the timed section.
15    util::lfence();
16
17    let tsc = util::rdtsc();
18
19    // Serialize `rdtsc` before any measured code.
20    util::lfence();
21
22    tsc
23}
24
25#[inline(always)]
26pub(crate) fn end_timestamp() -> u64 {
27    // `rdtscp` is serialized after all previous operations.
28    let tsc = util::rdtscp();
29
30    // Serialize `rdtscp` before any subsequent code.
31    util::lfence();
32
33    tsc
34}
35
36pub(crate) fn frequency() -> Result<u64, TscUnavailable> {
37    if !util::tsc_is_available() {
38        return Err(TscUnavailable::MissingInstructions);
39    }
40
41    if !util::tsc_is_invariant() {
42        return Err(TscUnavailable::VariableFrequency);
43    }
44
45    let nominal = nominal_frequency();
46    let measured = measure::measure_frequency();
47
48    // Use the nominal frequency if within 0.1% of the measured frequency.
49    //
50    // The nominal frequency is used for getting an exact value if the measured
51    // frequency is slightly off. It is not blindly trusted because it may not
52    // match the TSC frequency.
53    if let Some(nominal) = nominal {
54        if measured * 0.999 < nominal && nominal < measured * 1.001 {
55            return Ok(nominal.round() as u64);
56        }
57    }
58
59    Ok(measured.round() as u64)
60}
61
62/// Parses the CPU frequency in the brand name, e.g. "2.50GHz".
63fn nominal_frequency() -> Option<f64> {
64    let name = util::cpu_name()?;
65    let name = {
66        let len = name.iter().position(|&ch| ch == 0).unwrap_or(name.len());
67        std::str::from_utf8(&name[..len]).ok()?
68    };
69
70    #[rustfmt::skip]
71    let frequencies = [
72        ("MHz", 1e6),
73        ("GHz", 1e9),
74        ("THz", 1e12),
75    ];
76
77    for (unit, scale) in frequencies {
78        let Some(unit_start) = name.find(unit) else {
79            continue;
80        };
81
82        let pre_unit = &name[..unit_start];
83        let num = match pre_unit.rsplit_once(' ') {
84            Some((_, num)) => num,
85            None => pre_unit,
86        };
87
88        if let Ok(num) = num.parse::<f64>() {
89            return Some(num * scale);
90        };
91    }
92
93    None
94}
95
96mod util {
97    use super::*;
98
99    #[inline(always)]
100    pub fn rdtsc() -> u64 {
101        fence::compiler_fence();
102
103        // SAFETY: Reading the TSC is memory safe.
104        let tsc = unsafe { x86::_rdtsc() };
105
106        fence::compiler_fence();
107        tsc
108    }
109
110    #[inline(always)]
111    pub fn rdtscp() -> u64 {
112        fence::compiler_fence();
113
114        // SAFETY: Reading the TSC is memory safe.
115        let tsc = unsafe { x86::__rdtscp(&mut 0) };
116
117        fence::compiler_fence();
118        tsc
119    }
120
121    #[inline(always)]
122    pub fn lfence() {
123        // SAFETY: A load fence is memory safe.
124        unsafe { x86::_mm_lfence() }
125    }
126
127    #[inline]
128    fn cpuid(leaf: u32) -> x86::CpuidResult {
129        // SAFETY: `cpuid` is never unsafe to call.
130        unsafe { x86::__cpuid(leaf) }
131    }
132
133    /// Invokes CPUID and converts its output registers to an ordered array.
134    #[inline]
135    fn cpuid_array(leaf: u32) -> [u32; 4] {
136        let cpuid = cpuid(leaf);
137        [cpuid.eax, cpuid.ebx, cpuid.ecx, cpuid.edx]
138    }
139
140    /// Returns `true` if the given CPUID leaf is available.
141    #[inline]
142    fn cpuid_has_leaf(leaf: u32) -> bool {
143        cpuid(0x8000_0000).eax >= leaf
144    }
145
146    /// Returns `true` if CPUID indicates that the `rdtsc` and `rdtscp`
147    /// instructions are available.
148    #[inline]
149    pub fn tsc_is_available() -> bool {
150        let bits = cpuid(0x8000_0001).edx;
151
152        let rdtsc = 1 << 4;
153        let rdtscp = 1 << 27;
154
155        bits & (rdtsc | rdtscp) != 0
156    }
157
158    /// Returns `true` if CPUID indicates that the timestamp counter has a
159    /// constant frequency.
160    #[inline]
161    pub fn tsc_is_invariant() -> bool {
162        let leaf = 0x8000_0007;
163
164        if !cpuid_has_leaf(leaf) {
165            return false;
166        }
167
168        cpuid(leaf).edx & (1 << 8) != 0
169    }
170
171    /// Returns the processor model name as a null-terminated ASCII string.
172    pub fn cpu_name() -> Option<[u8; 48]> {
173        if !cpuid_has_leaf(0x8000_0004) {
174            return None;
175        }
176
177        #[rustfmt::skip]
178        let result = [
179            cpuid_array(0x8000_0002),
180            cpuid_array(0x8000_0003),
181            cpuid_array(0x8000_0004),
182        ];
183
184        // SAFETY: Converting from `u32` to bytes.
185        Some(unsafe { std::mem::transmute(result) })
186    }
187}
188
189mod measure {
190    use super::*;
191
192    /// Returns the TSC frequency by measuring it.
193    pub fn measure_frequency() -> f64 {
194        const TRIES: usize = 8;
195
196        // Start with delay of 1ms up to 256ms (2^TRIES).
197        let mut delay_ms = 1;
198
199        let mut prev_measure = f64::NEG_INFINITY;
200        let mut measures = [0.0; TRIES];
201
202        for slot in &mut measures {
203            let measure = measure_frequency_once(Duration::from_millis(delay_ms));
204
205            // This measurement is sufficiently accurate if within 0.1% of the
206            // previous.
207            if measure * 0.999 < prev_measure && prev_measure < measure * 1.001 {
208                return measure;
209            }
210
211            *slot = measure;
212            prev_measure = measure;
213
214            delay_ms *= 2;
215        }
216
217        // If no frequencies were within 0.1% of each other, find the frequency
218        // with the smallest delta.
219        let mut min_delta = f64::INFINITY;
220        let mut result_index = 0;
221
222        for i in 0..TRIES {
223            for j in (i + 1)..TRIES {
224                let delta = (measures[i] - measures[j]).abs();
225
226                if delta < min_delta {
227                    min_delta = delta;
228                    result_index = i;
229                }
230            }
231        }
232
233        measures[result_index]
234    }
235
236    fn measure_frequency_once(delay: Duration) -> f64 {
237        let (start_tsc, start_instant) = tsc_instant_pair();
238        std::thread::sleep(delay);
239        let (end_tsc, end_instant) = tsc_instant_pair();
240
241        let elapsed_tsc = end_tsc.saturating_sub(start_tsc);
242        let elapsed_duration = end_instant.duration_since(start_instant);
243
244        (elapsed_tsc as f64 / elapsed_duration.as_nanos() as f64) * 1e9
245    }
246
247    /// Returns a timestamp/instant pair that has a small latency between
248    /// getting the two values.
249    fn tsc_instant_pair() -> (u64, Instant) {
250        let mut best_latency = Duration::MAX;
251        let mut best_pair = (0, Instant::now());
252
253        // Make up to 100 attempts to get a low latency pair.
254        for _ in 0..100 {
255            let instant = Instant::now();
256            let tsc = util::rdtsc();
257            let latency = instant.elapsed();
258
259            let pair = (tsc, instant);
260
261            if latency.is_zero() {
262                return pair;
263            }
264
265            if latency < best_latency {
266                best_latency = latency;
267                best_pair = pair;
268            }
269        }
270
271        best_pair
272    }
273}