1use crate::bench_utils::latency;
4
5use super::{DiffOut, Timing};
6use std::{
7 cmp,
8 io::{Write, stderr},
9 sync::atomic::{AtomicU64, Ordering},
10 time::{Duration, Instant},
11};
12
13static WARMUP_MILLIS: AtomicU64 = AtomicU64::new(3_000);
14
15pub fn get_warmup_millis() -> u64 {
19 WARMUP_MILLIS.load(Ordering::Relaxed)
20}
21
22pub fn set_warmup_millis(millis: u64) {
24 WARMUP_MILLIS.store(millis, Ordering::Relaxed);
25}
26
27const WARMUP_INCREMENT_COUNT: usize = 20;
28
29#[derive(Clone, Copy, Debug)]
31pub enum LatencyUnit {
32 Milli,
33 Micro,
34 Nano,
35}
36
37impl LatencyUnit {
38 #[inline(always)]
40 pub fn latency_as_u64(&self, latency: Duration) -> u64 {
41 match self {
42 Self::Nano => latency.as_nanos() as u64,
43 Self::Micro => latency.as_micros() as u64,
44 Self::Milli => latency.as_millis() as u64,
45 }
46 }
47
48 #[inline(always)]
50 pub fn latency_from_u64(&self, elapsed: u64) -> Duration {
51 match self {
52 Self::Nano => Duration::from_nanos(elapsed),
53 Self::Micro => Duration::from_micros(elapsed),
54 Self::Milli => Duration::from_millis(elapsed),
55 }
56 }
57
58 #[inline(always)]
60 pub fn latency_as_f64(&self, latency: Duration) -> f64 {
61 self.latency_as_u64(latency) as f64
62 }
63
64 #[inline(always)]
66 pub fn latency_from_f64(&self, elapsed: f64) -> Duration {
67 self.latency_from_u64(elapsed as u64)
68 }
69}
70
71#[inline(always)]
74fn duo_exec(mut f1: impl FnMut(), mut f2: impl FnMut()) -> [(Duration, Duration); 2] {
75 let l01 = latency(&mut f1);
76 let l02 = latency(&mut f2);
77
78 let l12 = latency(&mut f2);
79 let l11 = latency(&mut f1);
80
81 [(l01, l02), (l11, l12)]
82}
83
84pub(crate) struct DiffState<'a> {
85 hist_f1: &'a mut Timing,
86 hist_f2: &'a mut Timing,
87 hist_f1_lt_f2: &'a mut Timing,
88 count_f1_eq_f2: &'a mut u64,
89 hist_f1_gt_f2: &'a mut Timing,
90 sum_f1: &'a mut i64,
91 sum_f2: &'a mut i64,
92 sum_ln_f1: &'a mut f64,
93 sum2_ln_f1: &'a mut f64,
94 sum_ln_f2: &'a mut f64,
95 sum2_ln_f2: &'a mut f64,
96 sum2_diff_f1_f2: &'a mut i64,
97 sum2_diff_ln_f1_f2: &'a mut f64,
98}
99
100impl<'a> DiffState<'a> {
101 pub fn new(out: &'a mut DiffOut) -> Self {
102 Self {
103 hist_f1: &mut out.hist_f1,
104 hist_f2: &mut out.hist_f2,
105 hist_f1_lt_f2: &mut out.hist_f1_lt_f2,
106 count_f1_eq_f2: &mut out.count_f1_eq_f2,
107 hist_f1_gt_f2: &mut out.hist_f1_gt_f2,
108 sum_f1: &mut out.sum_f1,
109 sum_f2: &mut out.sum_f2,
110 sum_ln_f1: &mut out.sum_ln_f1,
111 sum2_ln_f1: &mut out.sum2_ln_f1,
112 sum_ln_f2: &mut out.sum_ln_f2,
113 sum2_ln_f2: &mut out.sum2_ln_f2,
114 sum2_diff_f1_f2: &mut out.sum2_diff_f1_f2,
115 sum2_diff_ln_f1_f2: &mut out.sum2_diff_ln_f1_f2,
116 }
117 }
118
119 pub fn reversed(&'a mut self) -> Self {
120 Self {
121 hist_f1: self.hist_f2,
122 hist_f2: self.hist_f1,
123 hist_f1_lt_f2: self.hist_f1_gt_f2,
124 count_f1_eq_f2: self.count_f1_eq_f2,
125 hist_f1_gt_f2: self.hist_f1_lt_f2,
126 sum_f1: self.sum_f2,
127 sum_f2: self.sum_f1,
128 sum_ln_f1: self.sum_ln_f2,
129 sum2_ln_f1: self.sum2_ln_f2,
130 sum_ln_f2: self.sum_ln_f1,
131 sum2_ln_f2: self.sum2_ln_f1,
132 sum2_diff_f1_f2: self.sum2_diff_f1_f2,
133 sum2_diff_ln_f1_f2: self.sum2_diff_ln_f1_f2,
134 }
135 }
136
137 pub(crate) fn reset(&mut self) {
138 self.hist_f1.reset();
139 self.hist_f2.reset();
140 self.hist_f1_lt_f2.reset();
141 *self.count_f1_eq_f2 = 0;
142 self.hist_f1_gt_f2.reset();
143 *self.sum_f1 = 0;
144 *self.sum_f2 = 0;
145 *self.sum_ln_f1 = 0.;
146 *self.sum2_ln_f1 = 0.;
147 *self.sum_ln_f2 = 0.;
148 *self.sum2_ln_f2 = 0.;
149 *self.sum2_diff_f1_f2 = 0;
150 *self.sum2_diff_ln_f1_f2 = 0.;
151 }
152
153 #[inline(always)]
155 pub(crate) fn capture_data(&mut self, elapsed1: u64, elapsed2: u64) {
156 self.hist_f1
157 .record(elapsed1)
158 .expect("can't happen: histogram is auto-resizable");
159 self.hist_f2
160 .record(elapsed2)
161 .expect("can't happen: histogram is auto-resizable");
162
163 let diff = elapsed1 as i64 - elapsed2 as i64;
164
165 match diff.cmp(&0) {
166 cmp::Ordering::Less => self
167 .hist_f1_lt_f2
168 .record(diff as u64)
169 .expect("can't happen: histogram is auto-resizable"),
170 cmp::Ordering::Greater => self
171 .hist_f1_gt_f2
172 .record(-diff as u64)
173 .expect("can't happen: histogram is auto-resizable"),
174 cmp::Ordering::Equal => *self.count_f1_eq_f2 += 1,
175 }
176
177 assert!(elapsed1 > 0, "f1 latency must be > 0");
178 *self.sum_f1 += elapsed1 as i64;
179 let ln_f1 = (elapsed1 as f64).ln();
180 *self.sum_ln_f1 += ln_f1;
181 *self.sum2_ln_f1 += ln_f1.powi(2);
182
183 assert!(elapsed2 > 0, "f2 latency must be > 0");
184 *self.sum_f2 += elapsed2 as i64;
185 let ln_f2 = (elapsed2 as f64).ln();
186 *self.sum_ln_f2 += ln_f2;
187 *self.sum2_ln_f2 += ln_f2.powi(2);
188
189 let diff_f1_f2 = elapsed1 as i64 - elapsed2 as i64;
190 *self.sum2_diff_f1_f2 += diff_f1_f2.pow(2);
191
192 let diff_ln_f1_f2 = ln_f1 - ln_f2;
193 *self.sum2_diff_ln_f1_f2 += diff_ln_f1_f2.powi(2);
194 }
195
196 fn execute(
200 &mut self,
201 unit: LatencyUnit,
202 mut f1: impl FnMut(),
203 mut f2: impl FnMut(),
204 exec_count: usize,
205 pre_exec: impl FnOnce(),
206 mut exec_status: impl FnMut(usize),
207 init_status_count: usize,
208 ) {
209 pre_exec();
210
211 for i in 1..=exec_count / 2 {
212 let pairs = duo_exec(&mut f1, &mut f2);
213
214 for (latency1, latency2) in pairs {
215 let elapsed1 = unit.latency_as_u64(latency1);
216 let elapsed2 = unit.latency_as_u64(latency2);
217 self.capture_data(elapsed1, elapsed2);
218 }
219
220 exec_status(init_status_count + i * 2);
222 }
223 }
224
225 fn warmup(
229 &mut self,
230 unit: LatencyUnit,
231 mut f1: impl FnMut(),
232 mut f2: impl FnMut(),
233 mut warmup_status: impl FnMut(usize, u64, u64),
234 ) {
235 let warmup_millis = get_warmup_millis();
236 let start = Instant::now();
237 for i in 1.. {
238 self.execute(
239 unit,
240 &mut f1,
241 &mut f2,
242 WARMUP_INCREMENT_COUNT,
243 || {},
244 |_| {},
245 0,
246 );
247 let elapsed = Instant::now().duration_since(start);
248 warmup_status(i, elapsed.as_millis() as u64, warmup_millis);
249 if elapsed.ge(&Duration::from_millis(warmup_millis)) {
250 break;
251 }
252 }
253 }
254}
255
256pub fn bench_diff_x(
280 unit: LatencyUnit,
281 mut f1: impl FnMut(),
282 mut f2: impl FnMut(),
283 exec_count: usize,
284 mut warmup_status: impl FnMut(usize, u64, u64),
285 pre_exec: impl FnOnce(),
286 mut exec_status: impl FnMut(usize),
287) -> DiffOut {
288 let exec_count2 = exec_count / 2;
289
290 let mut out = DiffOut::new();
291
292 let mut state = DiffState::new(&mut out);
293 state.warmup(unit, &mut f1, &mut f2, &mut warmup_status);
294 state.reset();
295
296 state.execute(
297 unit,
298 &mut f1,
299 &mut f2,
300 exec_count2,
301 pre_exec,
302 &mut exec_status,
303 0,
304 );
305
306 let mut state_rev = state.reversed();
307 state_rev.execute(
308 unit,
309 &mut f2,
310 &mut f1,
311 exec_count2,
312 || (),
313 &mut exec_status,
314 exec_count2,
315 );
316
317 out
318}
319
320pub fn bench_diff(
335 unit: LatencyUnit,
336 f1: impl FnMut(),
337 f2: impl FnMut(),
338 exec_count: usize,
339) -> DiffOut {
340 bench_diff_x(unit, f1, f2, exec_count, |_, _, _| {}, || (), |_| ())
341}
342
343pub fn bench_diff_with_status(
362 unit: LatencyUnit,
363 f1: impl FnMut(),
364 f2: impl FnMut(),
365 exec_count: usize,
366 header: impl FnOnce(LatencyUnit, usize),
367) -> DiffOut {
368 header(unit, exec_count);
369
370 let warmup_status = {
371 let mut status_len: usize = 0;
372
373 move |_: usize, elapsed_millis: u64, warmup_millis: u64| {
374 if status_len == 0 {
375 eprint!("Warming up ... ");
376 stderr().flush().expect("unexpected I/O error");
377 }
378 eprint!("{}", "\u{8}".repeat(status_len));
379 let status = format!("{elapsed_millis} millis of {warmup_millis}.");
380 if elapsed_millis.lt(&warmup_millis) {
381 status_len = status.len();
382 } else {
383 status_len = 0; };
385 eprint!("{status}");
386 stderr().flush().expect("unexpected I/O error");
387 }
388 };
389
390 let pre_exec = || {
391 eprint!(" Executing bench_diff ... ");
392 stderr().flush().expect("unexpected I/O error");
393 };
394
395 let exec_status = {
396 let mut status_len: usize = 0;
397
398 move |i| {
399 eprint!("{}", "\u{8}".repeat(status_len));
400 let status = format!("{i} of {exec_count}.");
401 status_len = status.len();
402 eprint!("{status}");
403 stderr().flush().expect("unexpected I/O error");
404 }
405 };
406
407 bench_diff_x(
408 unit,
409 f1,
410 f2,
411 exec_count,
412 warmup_status,
413 pre_exec,
414 exec_status,
415 )
416}
417
418#[cfg(test)]
419#[cfg(feature = "_test_support")]
420#[allow(clippy::type_complexity)]
421mod test {
422 use super::*;
423 use crate::{
424 dev_utils::nest_btree_map,
425 test_support::{
426 ALPHA, BETA, Claim, ClaimResults, HI_1PCT_FACTOR, HI_10PCT_FACTOR, HI_25PCT_FACTOR,
427 ScaleParams, default_hi_stdev_ln, default_lo_stdev_ln, get_scale_params, get_scenario,
428 },
429 };
430 use rand::{SeedableRng, distr::Distribution, prelude::StdRng};
431 use rand_distr::LogNormal;
432 use std::{fmt::Debug, ops::Deref};
433
434 #[allow(clippy::large_enum_variant)]
435 enum MyFnMut {
436 Det {
437 median: f64,
438 },
439
440 NonDet {
441 median: f64,
442 lognormal: LogNormal<f64>,
443 rng: StdRng,
444 },
445 }
446
447 impl MyFnMut {
448 fn new_deterministic(median: f64) -> Self {
449 Self::Det { median }
450 }
451
452 fn new_non_deterministic(median: f64, stdev_ln: f64) -> Self {
453 let mu = 0.0_f64;
454 let sigma = stdev_ln;
455 Self::NonDet {
456 median,
457 lognormal: LogNormal::new(mu, sigma).expect("stdev_ln must be > 0"),
458 rng: StdRng::from_rng(&mut rand::rng()),
459 }
460 }
461
462 pub fn invoke(&mut self) -> f64 {
463 match self {
464 Self::Det { median } => *median,
465
466 Self::NonDet {
467 median,
468 lognormal,
469 rng,
470 } => {
471 let factor = lognormal.sample(rng);
472 *median * factor
473 }
474 }
475 }
476 }
477
478 const NAMED_FNS: [(&str, fn(f64) -> MyFnMut); 12] = {
479 [
480 ("base_median_no_var", |base_median| {
481 MyFnMut::new_deterministic(base_median)
482 }),
483 ("hi_1pct_median_no_var", |base_median| {
484 MyFnMut::new_deterministic(base_median * HI_1PCT_FACTOR)
485 }),
486 ("hi_10pct_median_no_var", |base_median| {
487 MyFnMut::new_deterministic(base_median * HI_10PCT_FACTOR)
488 }),
489 ("hi_25pct_median_no_var", |base_median| {
490 MyFnMut::new_deterministic(base_median * HI_25PCT_FACTOR)
491 }),
492 ("base_median_lo_var", |base_median| {
493 MyFnMut::new_non_deterministic(base_median, default_lo_stdev_ln())
494 }),
495 ("hi_1pct_median_lo_var", |base_median| {
496 MyFnMut::new_non_deterministic(base_median * HI_1PCT_FACTOR, default_lo_stdev_ln())
497 }),
498 ("hi_10pct_median_lo_var", |base_median| {
499 MyFnMut::new_non_deterministic(base_median * HI_10PCT_FACTOR, default_lo_stdev_ln())
500 }),
501 ("hi_25pct_median_lo_var", |base_median| {
502 MyFnMut::new_non_deterministic(base_median * HI_25PCT_FACTOR, default_lo_stdev_ln())
503 }),
504 ("base_median_hi_var", |base_median| {
505 MyFnMut::new_non_deterministic(base_median, default_hi_stdev_ln())
506 }),
507 ("hi_1pct_median_hi_var", |base_median| {
508 MyFnMut::new_non_deterministic(base_median * HI_1PCT_FACTOR, default_hi_stdev_ln())
509 }),
510 ("hi_10pct_median_hi_var", |base_median| {
511 MyFnMut::new_non_deterministic(base_median * HI_10PCT_FACTOR, default_hi_stdev_ln())
512 }),
513 ("hi_25pct_median_hi_var", |base_median| {
514 MyFnMut::new_non_deterministic(base_median * HI_25PCT_FACTOR, default_hi_stdev_ln())
515 }),
516 ]
517 };
518
519 fn get_fn(name: &str) -> fn(f64) -> MyFnMut {
520 NAMED_FNS
521 .iter()
522 .find(|pair| pair.0 == name)
523 .unwrap_or_else(|| panic!("invalid fn name: {name}"))
524 .1
525 }
526
527 fn diff_x(
528 mut f1: impl FnMut() -> f64,
529 mut f2: impl FnMut() -> f64,
530 exec_count: usize,
531 ) -> DiffOut {
532 let mut out = DiffOut::new();
533 let mut state = DiffState::new(&mut out);
534
535 for _ in 1..=exec_count {
536 let (elapsed1, elapsed2) = (f1() as u64, f2() as u64);
537 state.capture_data(elapsed1, elapsed2);
538 }
539
540 out
541 }
542
543 fn run_with_claims<T: Deref<Target = str> + Debug>(
544 scale_params: &ScaleParams,
545 name1: T,
546 name2: T,
547 verbose: bool,
548 nrepeats: usize,
549 run_name: &str,
550 ) {
551 let print_args = || {
552 println!("*** arguments ***");
553 println!("SCALE_NAME=\"{}\"", scale_params.name);
554 println!(
555 "unit={:?}, exec_count={}, base_median={}",
556 scale_params.unit, scale_params.exec_count, scale_params.base_median
557 );
558 println!("FN_NAME_PAIR=\"({name1:?}, {name2:?})\"");
559 println!("VERBOSE=\"{verbose}\"");
560 println!("nrepeats={nrepeats}");
561 println!("run_name=\"{run_name}\"");
562 };
563
564 println!();
565 print_args();
566 println!();
567
568 let scenario = get_scenario(&name1, &name2);
569
570 let mut f1 = {
571 let mut my_fn = get_fn(&name1)(scale_params.base_median);
572 move || my_fn.invoke()
573 };
574
575 let mut f2 = {
576 let mut my_fn = get_fn(&name2)(scale_params.base_median);
577 move || my_fn.invoke()
578 };
579
580 let mut results = ClaimResults::new();
581
582 for _ in 1..=nrepeats {
583 let diff_out = diff_x(&mut f1, &mut f2, scale_params.exec_count);
584 scenario.check_claims(&mut results, &diff_out, verbose);
585 }
586
587 if verbose {
588 println!("*** failures ***");
589 for claim_result in results.failures().iter() {
590 println!("{claim_result:?}");
591 }
592
593 println!();
594 println!("*** failure_summary ***");
595 for ((name_pair, claim_name), count) in results.failure_summary() {
596 println!("{name_pair:?} | {claim_name} ==> count={count}");
597 }
598
599 println!();
600 println!("*** success_summary ***");
601 for (name_pair, claim_name) in results.success_summary() {
602 println!("{name_pair:?} | {claim_name}");
603 }
604 } else {
605 println!("*** claim_summary ***");
606 for ((name_pair, claim_name), count) in results.summary() {
607 println!("{name_pair:?} | {claim_name} ==> count={count}");
608 }
609 }
610
611 let type_i_and_ii_errors_95 = results.excess_type_i_and_ii_errors(
612 ALPHA,
613 BETA,
614 &Claim::CRITICAL_NAMES,
615 nrepeats,
616 0.95,
617 );
618 assert!(
619 type_i_and_ii_errors_95.is_empty(),
620 "\n*** type_i_and_ii_errors_95: {:?}\n",
621 nest_btree_map(type_i_and_ii_errors_95)
622 );
623 }
624
625 const SCALE_NAMES: [&str; 1] = [
626 "micros_scale",
627 ];
630
631 #[test]
632 fn test_base_median_lo_var_base_median_lo_var() {
633 for name in SCALE_NAMES {
634 let scale = get_scale_params(name);
635 run_with_claims(
636 scale,
637 "base_median_lo_var",
638 "base_median_lo_var",
639 false,
640 300,
641 "test",
642 );
643 }
644 }
645
646 #[test]
647 fn test_base_median_lo_var_base_median_hi_var() {
648 for name in SCALE_NAMES {
649 let scale = get_scale_params(name);
650 run_with_claims(
651 scale,
652 "base_median_lo_var",
653 "base_median_hi_var",
654 false,
655 300,
656 "test",
657 );
658 }
659 }
660
661 #[test]
662 fn test_base_median_lo_var_hi_1pct_median_lo_var() {
663 for name in SCALE_NAMES {
664 let scale = get_scale_params(name);
665 run_with_claims(
666 scale,
667 "base_median_lo_var",
668 "hi_1pct_median_lo_var",
669 false,
670 100,
671 "test",
672 );
673 }
674 }
675
676 #[test]
677 fn test_base_median_lo_var_hi_10pct_median_lo_var() {
678 for name in SCALE_NAMES {
679 let scale = get_scale_params(name);
680 run_with_claims(
681 scale,
682 "base_median_lo_var",
683 "hi_10pct_median_lo_var",
684 false,
685 100,
686 "test",
687 );
688 }
689 }
690
691 #[test]
692 fn test_base_median_lo_var_hi_25pct_median_lo_var() {
693 for name in SCALE_NAMES {
694 let scale = get_scale_params(name);
695 run_with_claims(
696 scale,
697 "base_median_lo_var",
698 "hi_25pct_median_lo_var",
699 false,
700 100,
701 "test",
702 );
703 }
704 }
705
706 #[test]
723 fn test_base_median_lo_var_hi_10pct_median_hi_var() {
724 for name in SCALE_NAMES {
725 let scale = get_scale_params(name);
726 run_with_claims(
727 scale,
728 "base_median_lo_var",
729 "hi_10pct_median_hi_var",
730 false,
731 100,
732 "test",
733 );
734 }
735 }
736
737 #[test]
738 fn test_base_median_lo_var_hi_25pct_median_hi_var() {
739 for name in SCALE_NAMES {
740 let scale = get_scale_params(name);
741 run_with_claims(
742 scale,
743 "base_median_lo_var",
744 "hi_25pct_median_hi_var",
745 false,
746 100,
747 "test",
748 );
749 }
750 }
751}