1use std::sync::atomic::{AtomicU64, Ordering};
24use std::time::Instant;
25
26#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
28static CACHED_TSC_FREQ: AtomicU64 = AtomicU64::new(0);
29
30#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
32const TSC_UNCALIBRATED: u64 = 0;
33
34pub type Picoseconds = u64;
37
38pub type Nanoseconds = u64;
40
41pub const PS_PER_NS: u64 = 1_000;
43pub const PS_PER_US: u64 = 1_000_000;
44pub const PS_PER_MS: u64 = 1_000_000_000;
45pub const PS_PER_SEC: u64 = 1_000_000_000_000;
46
47#[derive(Clone, Copy, Debug, PartialEq, Eq)]
49pub struct HiResTimestamp {
50 pub picoseconds: Picoseconds,
52 pub uncertainty_low: Picoseconds,
54 pub uncertainty_high: Picoseconds,
56 pub is_estimated: bool,
58}
59
60impl HiResTimestamp {
61 pub fn from_nanos(ns: u64) -> Self {
63 HiResTimestamp {
64 picoseconds: ns.saturating_mul(PS_PER_NS),
65 uncertainty_low: 500, uncertainty_high: 500,
67 is_estimated: false,
68 }
69 }
70
71 pub fn from_picos(ps: Picoseconds, uncertainty: Picoseconds) -> Self {
73 HiResTimestamp {
74 picoseconds: ps,
75 uncertainty_low: uncertainty,
76 uncertainty_high: uncertainty,
77 is_estimated: true,
78 }
79 }
80
81 pub fn as_nanos(&self) -> u64 {
83 self.picoseconds / PS_PER_NS
84 }
85
86 pub fn as_micros(&self) -> u64 {
88 self.picoseconds / PS_PER_US
89 }
90
91 pub fn as_millis(&self) -> u64 {
93 self.picoseconds / PS_PER_MS
94 }
95
96 pub fn as_secs_f64(&self) -> f64 {
98 self.picoseconds as f64 / PS_PER_SEC as f64
99 }
100
101 pub fn format(&self) -> String {
103 if self.picoseconds < PS_PER_NS {
104 format!("{}ps", self.picoseconds)
105 } else if self.picoseconds < PS_PER_US {
106 format!("{:.3}ns", self.picoseconds as f64 / PS_PER_NS as f64)
107 } else if self.picoseconds < PS_PER_MS {
108 format!("{:.3}µs", self.picoseconds as f64 / PS_PER_US as f64)
109 } else if self.picoseconds < PS_PER_SEC {
110 format!("{:.3}ms", self.picoseconds as f64 / PS_PER_MS as f64)
111 } else {
112 format!("{:.3}s", self.picoseconds as f64 / PS_PER_SEC as f64)
113 }
114 }
115
116 pub fn format_with_uncertainty(&self) -> String {
118 let base = self.format();
119 let unc = if self.uncertainty_high == self.uncertainty_low {
120 format!("±{}ps", self.uncertainty_low)
121 } else {
122 format!("+{}/-{}ps", self.uncertainty_high, self.uncertainty_low)
123 };
124 format!("{} ({})", base, unc)
125 }
126}
127
128impl std::ops::Sub for HiResTimestamp {
129 type Output = HiResTimestamp;
130
131 fn sub(self, rhs: Self) -> Self::Output {
132 HiResTimestamp {
133 picoseconds: self.picoseconds.saturating_sub(rhs.picoseconds),
134 uncertainty_low: self.uncertainty_low + rhs.uncertainty_low,
135 uncertainty_high: self.uncertainty_high + rhs.uncertainty_high,
136 is_estimated: self.is_estimated || rhs.is_estimated,
137 }
138 }
139}
140
141pub struct HiResTimer {
143 start_instant: Instant,
145 #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
147 start_tsc: u64,
148 #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
150 tsc_freq_hz: u64,
151}
152
153impl HiResTimer {
154 #[inline]
156 pub fn start() -> Self {
157 #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
158 {
159 let start_tsc = rdtsc();
160 let start_instant = Instant::now();
161 let tsc_freq_hz = get_tsc_frequency();
162
163 HiResTimer {
164 start_instant,
165 start_tsc,
166 tsc_freq_hz,
167 }
168 }
169
170 #[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))]
171 {
172 HiResTimer {
173 start_instant: Instant::now(),
174 }
175 }
176 }
177
178 #[inline]
180 pub fn elapsed(&self) -> HiResTimestamp {
181 #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
182 {
183 if self.tsc_freq_hz > 0 {
184 let end_tsc = rdtsc();
185 let cycles = end_tsc.saturating_sub(self.start_tsc);
186
187 let ps = ((cycles as u128) * PS_PER_SEC as u128) / (self.tsc_freq_hz as u128);
190 let ps = ps.min(u64::MAX as u128) as u64;
191
192 let uncertainty = PS_PER_SEC / self.tsc_freq_hz;
194
195 return HiResTimestamp::from_picos(ps, uncertainty);
196 }
197 }
198
199 let elapsed = self.start_instant.elapsed();
201 HiResTimestamp::from_nanos(elapsed.as_nanos().min(u64::MAX as u128) as u64)
202 }
203
204 #[inline]
206 pub fn elapsed_nanos(&self) -> u64 {
207 self.elapsed().as_nanos()
208 }
209
210 #[inline]
212 pub fn elapsed_picos(&self) -> Picoseconds {
213 self.elapsed().picoseconds
214 }
215}
216
217#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
219#[inline]
220fn rdtsc() -> u64 {
221 #[cfg(target_arch = "x86_64")]
222 unsafe {
223 std::arch::x86_64::_rdtsc()
224 }
225
226 #[cfg(target_arch = "x86")]
227 unsafe {
228 std::arch::x86::_rdtsc()
229 }
230}
231
232#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
238#[inline]
239fn get_tsc_frequency() -> u64 {
240 let cached = CACHED_TSC_FREQ.load(Ordering::Relaxed);
242 if cached != TSC_UNCALIBRATED {
243 return cached;
244 }
245
246 let freq = calibrate_tsc_frequency();
248 CACHED_TSC_FREQ.store(freq, Ordering::Relaxed);
249 freq
250}
251
252#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
254fn calibrate_tsc_frequency() -> u64 {
255 if let Ok(content) = std::fs::read_to_string("/sys/devices/system/cpu/cpu0/tsc_freq_khz") {
257 if let Ok(khz) = content.trim().parse::<u64>() {
258 return khz * 1000;
259 }
260 }
261
262 if let Ok(content) = std::fs::read_to_string("/proc/cpuinfo") {
264 for line in content.lines() {
265 if line.starts_with("cpu MHz") {
266 if let Some(mhz_str) = line.split(':').nth(1) {
267 if let Ok(mhz) = mhz_str.trim().parse::<f64>() {
268 return (mhz * 1_000_000.0) as u64;
269 }
270 }
271 }
272 }
273 }
274
275 let calibration_ns = 1_000_000; let start_tsc = rdtsc();
279 let start = Instant::now();
280
281 while start.elapsed().as_nanos() < calibration_ns as u128 {
283 std::hint::spin_loop();
284 }
285
286 let end_tsc = rdtsc();
287 let actual_ns = start.elapsed().as_nanos() as u64;
288 let cycles = end_tsc.saturating_sub(start_tsc);
289
290 (cycles as u128 * 1_000_000_000 / actual_ns as u128) as u64
292}
293
294pub struct HiResMetrics {
299 pub count: AtomicU64,
301 pub total_ps: AtomicU64,
303 pub min_ps: AtomicU64,
305 pub max_ps: AtomicU64,
307 pub sum_sq_ns2: AtomicU64,
310}
311
312impl HiResMetrics {
313 pub const fn new() -> Self {
314 HiResMetrics {
315 count: AtomicU64::new(0),
316 total_ps: AtomicU64::new(0),
317 min_ps: AtomicU64::new(u64::MAX),
318 max_ps: AtomicU64::new(0),
319 sum_sq_ns2: AtomicU64::new(0),
320 }
321 }
322
323 pub fn record(&self, timestamp: HiResTimestamp) {
325 let ps = timestamp.picoseconds;
326 let ns = timestamp.as_nanos();
327
328 self.count.fetch_add(1, Ordering::Relaxed);
329 self.total_ps.fetch_add(ps, Ordering::Relaxed);
330
331 let mut cur_min = self.min_ps.load(Ordering::Relaxed);
333 while ps < cur_min {
334 match self.min_ps.compare_exchange_weak(
335 cur_min,
336 ps,
337 Ordering::Relaxed,
338 Ordering::Relaxed,
339 ) {
340 Ok(_) => break,
341 Err(x) => cur_min = x,
342 }
343 }
344
345 let mut cur_max = self.max_ps.load(Ordering::Relaxed);
347 while ps > cur_max {
348 match self.max_ps.compare_exchange_weak(
349 cur_max,
350 ps,
351 Ordering::Relaxed,
352 Ordering::Relaxed,
353 ) {
354 Ok(_) => break,
355 Err(x) => cur_max = x,
356 }
357 }
358
359 self.sum_sq_ns2
361 .fetch_add(ns.saturating_mul(ns), Ordering::Relaxed);
362 }
363
364 pub fn record_timer(&self, timer: &HiResTimer) {
366 self.record(timer.elapsed());
367 }
368
369 pub fn snapshot(&self) -> HiResMetricsSnapshot {
371 let count = self.count.load(Ordering::Relaxed);
372 let total_ps = self.total_ps.load(Ordering::Relaxed);
373 let min_ps = self.min_ps.load(Ordering::Relaxed);
374 let max_ps = self.max_ps.load(Ordering::Relaxed);
375 let sum_sq_ns2 = self.sum_sq_ns2.load(Ordering::Relaxed);
376
377 let mean_ps = if count > 0 { total_ps / count } else { 0 };
378
379 let mean_ns = mean_ps / PS_PER_NS;
381 let variance_ns2 = if count > 1 {
382 let e_x2 = sum_sq_ns2 / count;
383 e_x2.saturating_sub(mean_ns.saturating_mul(mean_ns))
384 } else {
385 0
386 };
387
388 let variance_ps = variance_ns2.saturating_mul(PS_PER_NS * PS_PER_NS);
390 let stddev_ps = (variance_ps as f64).sqrt() as u64;
391
392 HiResMetricsSnapshot {
393 count,
394 total_ps,
395 min_ps: if min_ps == u64::MAX { 0 } else { min_ps },
396 max_ps,
397 mean_ps,
398 stddev_ps,
399 }
400 }
401
402 pub fn reset(&self) {
404 self.count.store(0, Ordering::Relaxed);
405 self.total_ps.store(0, Ordering::Relaxed);
406 self.min_ps.store(u64::MAX, Ordering::Relaxed);
407 self.max_ps.store(0, Ordering::Relaxed);
408 self.sum_sq_ns2.store(0, Ordering::Relaxed);
409 }
410}
411
412impl Default for HiResMetrics {
413 fn default() -> Self {
414 Self::new()
415 }
416}
417
418#[derive(Clone, Copy, Debug, Default)]
420pub struct HiResMetricsSnapshot {
421 pub count: u64,
423 pub total_ps: Picoseconds,
425 pub min_ps: Picoseconds,
427 pub max_ps: Picoseconds,
429 pub mean_ps: Picoseconds,
431 pub stddev_ps: Picoseconds,
433}
434
435impl HiResMetricsSnapshot {
436 pub fn format(&self) -> String {
438 if self.count == 0 {
439 return "no samples".to_string();
440 }
441
442 format!(
443 "n={} mean={} min={} max={} stddev={}",
444 self.count,
445 HiResTimestamp::from_picos(self.mean_ps, 0).format(),
446 HiResTimestamp::from_picos(self.min_ps, 0).format(),
447 HiResTimestamp::from_picos(self.max_ps, 0).format(),
448 HiResTimestamp::from_picos(self.stddev_ps, 0).format(),
449 )
450 }
451
452 pub fn ops_per_sec(&self) -> f64 {
454 if self.count == 0 || self.total_ps == 0 {
455 return 0.0;
456 }
457 self.count as f64 / (self.total_ps as f64 / PS_PER_SEC as f64)
458 }
459
460 pub fn ops_per_us(&self) -> f64 {
462 if self.count == 0 || self.total_ps == 0 {
463 return 0.0;
464 }
465 self.count as f64 / (self.total_ps as f64 / PS_PER_US as f64)
466 }
467}
468
469#[inline]
471pub fn measure<F, R>(f: F) -> (R, HiResTimestamp)
472where
473 F: FnOnce() -> R,
474{
475 let timer = HiResTimer::start();
476 let result = f();
477 (result, timer.elapsed())
478}
479
480pub fn measure_n<F, R>(n: usize, mut f: F) -> (Vec<R>, HiResMetricsSnapshot)
482where
483 F: FnMut() -> R,
484{
485 let metrics = HiResMetrics::new();
486 let mut results = Vec::with_capacity(n);
487
488 for _ in 0..n {
489 let timer = HiResTimer::start();
490 results.push(f());
491 metrics.record_timer(&timer);
492 }
493
494 (results, metrics.snapshot())
495}
496
497#[cfg(test)]
498mod tests {
499 use super::*;
500
501 #[test]
502 fn test_timestamp_from_nanos() {
503 let ts = HiResTimestamp::from_nanos(1_000);
504 assert_eq!(ts.picoseconds, 1_000_000);
505 assert_eq!(ts.as_nanos(), 1_000);
506 assert!(!ts.is_estimated);
507 }
508
509 #[test]
510 fn test_timestamp_formatting() {
511 assert_eq!(HiResTimestamp::from_picos(500, 0).format(), "500ps");
512 assert_eq!(HiResTimestamp::from_nanos(1).format(), "1.000ns");
513 assert_eq!(HiResTimestamp::from_nanos(1_000).format(), "1.000µs");
514 assert_eq!(HiResTimestamp::from_nanos(1_000_000).format(), "1.000ms");
515 assert_eq!(HiResTimestamp::from_nanos(1_000_000_000).format(), "1.000s");
516 }
517
518 #[test]
519 fn test_timestamp_subtraction() {
520 let a = HiResTimestamp::from_nanos(1_000);
521 let b = HiResTimestamp::from_nanos(500);
522 let diff = a - b;
523 assert_eq!(diff.as_nanos(), 500);
524 }
525
526 #[test]
527 fn test_hires_timer_basic() {
528 let timer = HiResTimer::start();
529 std::thread::sleep(std::time::Duration::from_micros(100));
530 let elapsed = timer.elapsed();
531
532 assert!(
534 elapsed.picoseconds >= 100_000_000,
535 "elapsed: {} ps",
536 elapsed.picoseconds
537 );
538 assert!(
540 elapsed.picoseconds < 10_000_000_000_000,
541 "elapsed: {} ps",
542 elapsed.picoseconds
543 );
544 }
545
546 #[test]
547 fn test_hires_metrics_accumulation() {
548 let metrics = HiResMetrics::new();
549
550 metrics.record(HiResTimestamp::from_nanos(100));
552 metrics.record(HiResTimestamp::from_nanos(200));
553 metrics.record(HiResTimestamp::from_nanos(300));
554
555 let snapshot = metrics.snapshot();
556 assert_eq!(snapshot.count, 3);
557 assert_eq!(snapshot.min_ps, 100 * PS_PER_NS);
558 assert_eq!(snapshot.max_ps, 300 * PS_PER_NS);
559 assert_eq!(snapshot.mean_ps, 200 * PS_PER_NS);
560 }
561
562 #[test]
563 fn test_measure_closure() {
564 let (result, timing) = measure(|| {
565 let mut sum = 0u64;
566 for i in 0..1000 {
567 sum = sum.wrapping_add(i);
568 }
569 sum
570 });
571
572 assert!(result > 0);
573 assert!(timing.picoseconds > 0);
574 }
575
576 #[test]
577 fn test_measure_n() {
578 let iterations = 100;
579 let (results, stats) = measure_n(iterations, || {
580 std::hint::black_box(42)
582 });
583
584 assert_eq!(results.len(), iterations);
585 assert_eq!(stats.count, iterations as u64);
586 assert!(stats.min_ps > 0);
587 assert!(stats.max_ps >= stats.min_ps);
588 }
589
590 #[test]
591 fn test_picosecond_precision_smoke() {
592 let timer = HiResTimer::start();
594
595 let _ = std::hint::black_box(1 + 1);
597
598 let elapsed = timer.elapsed();
599
600 println!(
603 "Short operation: {} ({})",
604 elapsed.format(),
605 elapsed.format_with_uncertainty()
606 );
607
608 assert!(elapsed.picoseconds > 0 || !elapsed.is_estimated);
610 }
611}