1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407
//! High-speed timing facility. //! //! quanta provides a generalized interface to native high-speed timing mechanisms on different //! target platforms, including support for accessing the TSC (time stamp counter) directly from //! the processor. //! //! # Design //! //! Internally, two clocks are used — a reference and a source — to provide high-speed access to //! timing values, while allowing for conversion back to a reference timescale that matches the //! underlying system. //! //! Calibration between the reference and source happens at initialization time of [`Clock`]. //! //! # Platform support //! //! quanta supports using the native high-speed timing facilities on the following platforms: //! - Windows ([QueryPerformanceCounter]) //! - macOS/OS X ([mach_absolute_time]) //! - Linux/*BSD/Solaris ([clock_gettime]) //! //! # TSC support //! //! Additionally, quanta has `RDTSC`/`RDTSCP` support for querying the time stamp counter directly //! from the processor. Using this mode has some caveats: //! - you need to be running nightly to have access to the `asm!` macro/feature ([#29722]) //! - your processor needs to be recent enough to support a stable TSC mode like invariant or //! constant TSC ([source][tsc_support]) //! //! This feature is only usable when compiled with the `asm` feature flag. //! //! Generally speaking, most modern operating systems will already be attempting to use the TSC on //! your behalf, along with switching to another clocksource if they determine that the TSC is //! unstable or providing unsuitable speed/accuracy. The primary reason to use TSC directly is that //! calling it from userspace is measurably faster — although the "slower" methods are still on //! the order of of tens of nanoseconds — and can be useful for timing operations which themselves //! are _extremely_ fast. //! //! If your operations run in the hundreds of nanoseconds or less range, or you're measuring in a //! tight loop, using the TSC support could help you avoid the normal overhead which would otherwise //! contribute to a large chunk of actual time spent and would otherwise consume valuable cycles. //! //! [QueryPerformanceCounter]: https://msdn.microsoft.com/en-us/library/ms644904(v=VS.85).aspx //! [mach_absolute_time]: https://developer.apple.com/library/archive/qa/qa1398/_index.html //! [clock_gettime]: https://linux.die.net/man/3/clock_gettime //! [#29722]: https://github.com/rust-lang/rust/issues/29722 //! [tsc_support]: http://oliveryang.net/2015/09/pitfalls-of-TSC-usage/ #![cfg_attr(feature = "tsc", feature(asm))] use std::sync::{ atomic::{AtomicU64, Ordering}, Arc, }; use std::time::Duration; mod monotonic; use self::monotonic::Monotonic; mod counter; #[allow(unused_imports)] use self::counter::Counter; mod mock; pub use self::mock::{IntoNanoseconds, Mock}; mod instant; pub use self::instant::Instant; mod upkeep; pub use self::upkeep::{Builder, Handle}; static GLOBAL_RECENT: AtomicU64 = AtomicU64::new(0); type Reference = Monotonic; #[cfg(feature = "tsc")] type Source = Counter; #[cfg(not(feature = "tsc"))] type Source = Monotonic; #[derive(Debug, Clone)] enum ClockType { Optimized(Reference, Source, Calibration), Mock(Arc<Mock>), } /// A clock source that can provide the current time. trait ClockSource { /// Gets the current time. /// /// Ideally, this method should return the time in nanoseconds to match the default clock, but /// it is not a requirement. fn now(&self) -> u64; /// Gets the current time, optimized for measuring the time before the start of a code section. /// /// This allows clock sources to provide a start-specific measurement that has a more accuracy /// than the general performance of [`now`]. This is useful for short sections of code where /// the reordering of CPU instructions could affect the actual start/end measurements. /// /// Ideally, this method should return the time in nanoseconds to match the default clock, but /// it is not a requirement. fn start(&self) -> u64; /// Gets the current time, optimized for measuring the time after the end of a code section. /// /// This allows clock sources to provide a end-specific measurement that has a more accuracy /// than the general performance of [`now`]. This is useful for short sections of code where /// the reordering of CPU instructions could affect the actual start/end measurements. /// /// Ideally, this method should return the time in nanoseconds to match the default clock, but /// it is not a requirement. fn end(&self) -> u64; } #[derive(Debug, Clone)] pub(crate) struct Calibration { identical: bool, ref_time: f64, src_time: f64, ref_hz: f64, src_hz: f64, hz_ratio: f64, } impl Calibration { pub fn new() -> Calibration { Calibration { identical: false, ref_time: 0.0, src_time: 0.0, ref_hz: 1_000_000_000.0, src_hz: 1_000_000_000.0, hz_ratio: 1.0, } } #[allow(dead_code)] pub fn identical() -> Calibration { let mut calibration = Self::new(); calibration.identical = true; calibration } #[allow(dead_code)] pub fn calibrate<R, S>(&mut self, reference: &R, source: &S) where R: ClockSource, S: ClockSource, { self.ref_time = reference.now() as f64; self.src_time = source.start() as f64; let ref_end = self.ref_time + self.ref_hz; loop { let t = reference.now() as f64; if t >= ref_end { break; } } let src_end = source.end() as f64; let ref_d = ref_end - self.ref_time; let src_d = src_end - self.src_time; self.src_hz = (src_d * self.ref_hz) / ref_d; self.hz_ratio = self.ref_hz / self.src_hz; } } impl Default for Calibration { fn default() -> Self { Self::new() } } /// Unified clock for taking measurements. #[derive(Debug, Clone)] pub struct Clock { inner: ClockType, } impl Clock { /// Creates a new clock with the optimal reference and source. /// /// Both the reference clock and source clock are chosen at compile-time to be the fastest /// underlying clocks available. The source clock is calibrated against the reference clock if /// need be. #[cfg(feature = "tsc")] pub fn new() -> Clock { let reference = Reference::new(); let source = Source::new(); let mut calibration = Calibration::new(); calibration.calibrate(&reference, &source); Clock { inner: ClockType::Optimized(reference, source, calibration), } } /// Creates a new clock with the optimal reference and source. /// /// Both the reference clock and source clock are chosen at compile-time to be the fastest /// underlying clocks available. The source clock is calibrated against the reference clock if /// need be. #[cfg(not(feature = "tsc"))] pub fn new() -> Clock { let reference = Reference::new(); let source = Source::new(); let calibration = Calibration::identical(); Clock { inner: ClockType::Optimized(reference, source, calibration), } } /// Creates a new clock that is mocked for controlling the underlying time. /// /// Returns a [`Clock`] instance and a handle to the underlying [`Mock`] source so that the /// caller can control the passage of time. pub fn mock() -> (Clock, Arc<Mock>) { let mock = Arc::new(Mock::new()); let clock = Clock { inner: ClockType::Mock(mock.clone()), }; (clock, mock) } /// Gets the current time, scaled to reference time. /// /// Returns an [`Instant`] pub fn now(&self) -> Instant { match &self.inner { ClockType::Optimized(_, source, _) => self.scaled(source.now()), ClockType::Mock(mock) => Instant(mock.now()), } } /// Gets the underlying time from the source clock. /// /// Value is not guaranteed to be in nanoseconds. /// /// It requires conversion to reference time, however, via [`scaled`] or [`delta`]. /// /// If you need maximum accuracy in your measurements, consider using [`start`] and [`end`]. /// /// [`scaled`]: Clock::scaled /// [`delta`]: Clock::delta /// [`start`]: Clock::start /// [`end`]: Clock::end pub fn raw(&self) -> u64 { match &self.inner { ClockType::Optimized(_, source, _) => source.now(), ClockType::Mock(mock) => mock.now(), } } /// Gets the underlying time from the source clock, specific to starting an operation. /// /// Value is not guaranteed to be in nanoseconds. /// /// Provides the same functionality as [`raw`], but tries to ensure that no extra CPU /// instructions end up executing after the measurement is taken. Since normal processors are /// typically out-of-order, other operations that logically come before a call to this method /// could be reordered to come after the measurement, thereby skewing the overall time /// measured. /// /// [`raw`]: Clock::raw pub fn start(&self) -> u64 { match &self.inner { ClockType::Optimized(_, source, _) => source.start(), ClockType::Mock(mock) => mock.now(), } } /// Gets the underlying time from the source clock, specific to ending an operation. /// /// Value is not guaranteed to be in nanoseconds. /// /// Provides the same functionality as [`raw`], but tries to ensure that no extra CPU /// instructions end up executing before the measurement is taken. Since normal processors are /// typically out-of-order, other operations that logically come after a call to this method /// could be reordered to come before the measurement, thereby skewing the overall time /// measured. /// /// [`raw`]: Clock::raw pub fn end(&self) -> u64 { match &self.inner { ClockType::Optimized(_, source, _) => source.end(), ClockType::Mock(mock) => mock.now(), } } /// Scales a raw measurement to reference time. /// /// You must scale raw measurements to ensure your result is in nanoseconds. The raw /// measurement is not guaranteed to be in nanoseconds and may vary. It is only OK to avoid /// scaling raw measurements if you don't need actual nanoseconds. /// /// Returns an [`Instant`]. pub fn scaled(&self, value: u64) -> Instant { match &self.inner { ClockType::Optimized(_, _, calibration) => { let scaled = if calibration.identical { value } else { (((value as f64 - calibration.src_time) * calibration.hz_ratio) + calibration.ref_time) as u64 }; Instant(scaled) }, ClockType::Mock(_) => Instant(value), } } /// Calculates the delta between two measurements, and scales to reference time. /// /// Value is in nanoseconds. /// /// This method is slightly faster when you know you need the delta between two raw /// measurements, or a start/end measurement, than using [`scaled`] for both conversions. /// /// [`scaled`]: Clock::scaled pub fn delta(&self, start: u64, end: u64) -> Duration { let raw_delta = end.wrapping_sub(start); let scaled = match &self.inner { ClockType::Optimized(_, _, calibration) => (raw_delta as f64 * calibration.hz_ratio) as u64, ClockType::Mock(_) => raw_delta, }; Duration::from_nanos(scaled) } /// Gets the most recent current time, scaled to reference time. /// /// This method provides ultra-low-overhead access to a slightly-delayed version of the current /// time. Instead of querying the underlying source clock directly, an external task -- the /// upkeep thread -- is responsible for polling the time and updating a global reference to the /// "recent" time, which this method then loads. /// /// This method is usually at least 2x faster than querying the clock directly, and could be /// even faster in cases where getting the time goes through a heavy virtualized interface, or /// requires syscalls. /// /// The resolution of the time is still in nanoseconds, although the accuracy can only be as /// high as the interval at which the upkeep thread updates the global recent time. /// /// For example, the upkeep thread could be configured to update the time every millisecond, /// which would provide a measurement that should be, at most, 1ms behind the actual time. /// /// If the upkeep thread has not been started, the return value will be `0`. /// /// Returns an [`Instant`]. pub fn recent(&self) -> Instant { match &self.inner { ClockType::Optimized(_, _, _) => Instant::new(GLOBAL_RECENT.load(Ordering::Relaxed)), ClockType::Mock(mock) => Instant::new(mock.now()), } } /// Updates the recent current time. pub(crate) fn upkeep(value: Instant) { GLOBAL_RECENT.store(value.0, Ordering::Release); } } impl Default for Clock { fn default() -> Clock { Clock::new() } } #[cfg(test)] mod tests { use super::Clock; #[test] fn test_mock() { let (clock, mock) = Clock::mock(); assert_eq!(clock.now().as_u64(), 0); mock.increment(42); assert_eq!(clock.now().as_u64(), 42); } #[test] fn test_now() { let clock = Clock::new(); assert!(clock.now().as_u64() > 0); } #[test] fn test_raw() { let clock = Clock::new(); assert!(clock.raw() > 0); } #[test] fn test_start() { let clock = Clock::new(); assert!(clock.start() > 0); } #[test] fn test_end() { let clock = Clock::new(); assert!(clock.end() > 0); } #[test] fn test_scaled() { let clock = Clock::new(); let raw = clock.raw(); let scaled = clock.scaled(raw); assert!(scaled.as_u64() > 0); } }