1use serde::{Deserialize, Serialize};
66use std::cell::RefCell;
67use std::sync::{
68 Arc,
69 atomic::{AtomicBool, AtomicU64, Ordering},
70 mpsc,
71};
72use std::thread::{self, JoinHandle};
73use std::time::{Duration, Instant};
74use thiserror::Error;
75
76#[derive(Clone, Debug, Serialize, Deserialize)]
115pub struct BenchSpec {
116 pub name: String,
120
121 pub iterations: u32,
125
126 pub warmup: u32,
131}
132
133impl BenchSpec {
134 pub fn new(name: impl Into<String>, iterations: u32, warmup: u32) -> Result<Self, TimingError> {
160 if iterations == 0 {
161 return Err(TimingError::NoIterations { count: iterations });
162 }
163
164 Ok(Self {
165 name: name.into(),
166 iterations,
167 warmup,
168 })
169 }
170}
171
172#[derive(Clone, Debug, Default, Serialize, Deserialize)]
192pub struct BenchSample {
193 pub duration_ns: u64,
197
198 #[serde(default, skip_serializing_if = "Option::is_none")]
203 pub cpu_time_ms: Option<u64>,
204
205 #[serde(default, skip_serializing_if = "Option::is_none")]
210 pub peak_memory_kb: Option<u64>,
211}
212
213impl BenchSample {
214 fn from_measurement(duration: Duration, resources: IterationResourceUsage) -> Self {
215 Self {
216 duration_ns: duration.as_nanos() as u64,
217 cpu_time_ms: resources.cpu_time_ms,
218 peak_memory_kb: resources.peak_memory_kb,
219 }
220 }
221}
222
223#[derive(Clone, Debug, Serialize, Deserialize)]
252pub struct BenchReport {
253 pub spec: BenchSpec,
255
256 pub samples: Vec<BenchSample>,
260
261 pub phases: Vec<SemanticPhase>,
263
264 pub timeline: Vec<HarnessTimelineSpan>,
266}
267
268#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
269pub struct HarnessTimelineSpan {
270 pub phase: String,
271 pub start_offset_ns: u64,
272 pub end_offset_ns: u64,
273 pub iteration: Option<u32>,
274}
275
276impl BenchReport {
277 #[must_use]
279 pub fn mean_ns(&self) -> f64 {
280 if self.samples.is_empty() {
281 return 0.0;
282 }
283 let sum: u64 = self.samples.iter().map(|s| s.duration_ns).sum();
284 sum as f64 / self.samples.len() as f64
285 }
286
287 #[must_use]
289 pub fn median_ns(&self) -> f64 {
290 if self.samples.is_empty() {
291 return 0.0;
292 }
293 let mut sorted: Vec<u64> = self.samples.iter().map(|s| s.duration_ns).collect();
294 sorted.sort_unstable();
295 let len = sorted.len();
296 if len % 2 == 0 {
297 (sorted[len / 2 - 1] + sorted[len / 2]) as f64 / 2.0
298 } else {
299 sorted[len / 2] as f64
300 }
301 }
302
303 #[must_use]
305 pub fn std_dev_ns(&self) -> f64 {
306 if self.samples.len() < 2 {
307 return 0.0;
308 }
309 let mean = self.mean_ns();
310 let variance: f64 = self
311 .samples
312 .iter()
313 .map(|s| {
314 let diff = s.duration_ns as f64 - mean;
315 diff * diff
316 })
317 .sum::<f64>()
318 / (self.samples.len() - 1) as f64;
319 variance.sqrt()
320 }
321
322 #[must_use]
324 pub fn percentile_ns(&self, p: f64) -> f64 {
325 if self.samples.is_empty() {
326 return 0.0;
327 }
328 let mut sorted: Vec<u64> = self.samples.iter().map(|s| s.duration_ns).collect();
329 sorted.sort_unstable();
330 let p = p.clamp(0.0, 100.0) / 100.0;
331 let index = (p * (sorted.len() - 1) as f64).round() as usize;
332 sorted[index.min(sorted.len() - 1)] as f64
333 }
334
335 #[must_use]
337 pub fn min_ns(&self) -> u64 {
338 self.samples
339 .iter()
340 .map(|s| s.duration_ns)
341 .min()
342 .unwrap_or(0)
343 }
344
345 #[must_use]
347 pub fn max_ns(&self) -> u64 {
348 self.samples
349 .iter()
350 .map(|s| s.duration_ns)
351 .max()
352 .unwrap_or(0)
353 }
354
355 #[must_use]
357 pub fn cpu_total_ms(&self) -> Option<u64> {
358 let values = self
359 .samples
360 .iter()
361 .filter_map(|sample| sample.cpu_time_ms)
362 .collect::<Vec<_>>();
363 if values.is_empty() {
364 return None;
365 }
366
367 let total = values
368 .iter()
369 .fold(0_u128, |sum, value| sum.saturating_add(u128::from(*value)));
370 Some(total.min(u128::from(u64::MAX)) as u64)
371 }
372
373 #[must_use]
375 pub fn cpu_median_ms(&self) -> Option<u64> {
376 let mut values = self
377 .samples
378 .iter()
379 .filter_map(|sample| sample.cpu_time_ms)
380 .collect::<Vec<_>>();
381 if values.is_empty() {
382 return None;
383 }
384
385 values.sort_unstable();
386 let len = values.len();
387 Some(if len % 2 == 0 {
388 let lower = u128::from(values[(len / 2) - 1]);
389 let upper = u128::from(values[len / 2]);
390 ((lower + upper) / 2) as u64
391 } else {
392 values[len / 2]
393 })
394 }
395
396 #[must_use]
398 pub fn peak_memory_kb(&self) -> Option<u64> {
399 self.samples
400 .iter()
401 .filter_map(|sample| sample.peak_memory_kb)
402 .max()
403 }
404
405 #[must_use]
407 pub fn summary(&self) -> BenchSummary {
408 BenchSummary {
409 name: self.spec.name.clone(),
410 iterations: self.samples.len() as u32,
411 warmup: self.spec.warmup,
412 mean_ns: self.mean_ns(),
413 median_ns: self.median_ns(),
414 std_dev_ns: self.std_dev_ns(),
415 min_ns: self.min_ns(),
416 max_ns: self.max_ns(),
417 p95_ns: self.percentile_ns(95.0),
418 p99_ns: self.percentile_ns(99.0),
419 }
420 }
421}
422
423#[derive(Clone, Debug, Default)]
424struct IterationResourceUsage {
425 cpu_time_ms: Option<u64>,
426 peak_memory_kb: Option<u64>,
427}
428
429fn instant_offset_ns(origin: Instant, instant: Instant) -> u64 {
430 instant
431 .duration_since(origin)
432 .as_nanos()
433 .min(u128::from(u64::MAX)) as u64
434}
435
436fn push_timeline_span(
437 timeline: &mut Vec<HarnessTimelineSpan>,
438 origin: Instant,
439 phase: &str,
440 started_at: Instant,
441 ended_at: Instant,
442 iteration: Option<u32>,
443) {
444 timeline.push(HarnessTimelineSpan {
445 phase: phase.to_string(),
446 start_offset_ns: instant_offset_ns(origin, started_at),
447 end_offset_ns: instant_offset_ns(origin, ended_at),
448 iteration,
449 });
450}
451
452#[derive(Clone, Debug, Serialize, Deserialize)]
454pub struct BenchSummary {
455 pub name: String,
457 pub iterations: u32,
459 pub warmup: u32,
461 pub mean_ns: f64,
463 pub median_ns: f64,
465 pub std_dev_ns: f64,
467 pub min_ns: u64,
469 pub max_ns: u64,
471 pub p95_ns: f64,
473 pub p99_ns: f64,
475}
476
477#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
479pub struct SemanticPhase {
480 pub name: String,
481 pub duration_ns: u64,
482}
483
484#[derive(Default)]
485struct SemanticPhaseCollector {
486 enabled: bool,
487 depth: usize,
488 phases: Vec<SemanticPhase>,
489}
490
491impl SemanticPhaseCollector {
492 fn reset(&mut self) {
493 self.enabled = false;
494 self.depth = 0;
495 self.phases.clear();
496 }
497
498 fn begin_measurement(&mut self) {
499 self.reset();
500 self.enabled = true;
501 }
502
503 fn finish(&mut self) -> Vec<SemanticPhase> {
504 self.enabled = false;
505 self.depth = 0;
506 std::mem::take(&mut self.phases)
507 }
508
509 fn enter_phase(&mut self) -> Option<bool> {
510 if !self.enabled {
511 return None;
512 }
513 let top_level = self.depth == 0;
514 self.depth += 1;
515 Some(top_level)
516 }
517
518 fn exit_phase(&mut self, name: &str, top_level: bool, elapsed: Duration) {
519 self.depth = self.depth.saturating_sub(1);
520 if !self.enabled || !top_level {
521 return;
522 }
523
524 let duration_ns = elapsed.as_nanos().min(u128::from(u64::MAX)) as u64;
525 if let Some(phase) = self.phases.iter_mut().find(|phase| phase.name == name) {
526 phase.duration_ns = phase.duration_ns.saturating_add(duration_ns);
527 } else {
528 self.phases.push(SemanticPhase {
529 name: name.to_string(),
530 duration_ns,
531 });
532 }
533 }
534}
535
536thread_local! {
537 static SEMANTIC_PHASE_COLLECTOR: RefCell<SemanticPhaseCollector> =
538 RefCell::new(SemanticPhaseCollector::default());
539}
540
541struct SemanticPhaseGuard {
542 name: String,
543 started_at: Option<Instant>,
544 top_level: bool,
545}
546
547impl Drop for SemanticPhaseGuard {
548 fn drop(&mut self) {
549 let Some(started_at) = self.started_at else {
550 return;
551 };
552
553 let elapsed = started_at.elapsed();
554 SEMANTIC_PHASE_COLLECTOR.with(|collector| {
555 collector
556 .borrow_mut()
557 .exit_phase(&self.name, self.top_level, elapsed);
558 });
559 }
560}
561
562fn reset_semantic_phase_collection() {
563 SEMANTIC_PHASE_COLLECTOR.with(|collector| collector.borrow_mut().reset());
564}
565
566fn begin_semantic_phase_collection() {
567 SEMANTIC_PHASE_COLLECTOR.with(|collector| collector.borrow_mut().begin_measurement());
568}
569
570fn finish_semantic_phase_collection() -> Vec<SemanticPhase> {
571 SEMANTIC_PHASE_COLLECTOR.with(|collector| collector.borrow_mut().finish())
572}
573
574trait ResourceMonitor {
575 type Token;
576
577 fn start(&mut self) -> Self::Token;
578
579 fn finish(&mut self, token: Self::Token) -> IterationResourceUsage;
580}
581
582#[derive(Default)]
583struct DefaultResourceMonitor;
584
585#[derive(Clone, Copy, Debug, PartialEq, Eq)]
586struct ProcessCpuTimeSnapshot {
587 user_ns: u64,
588 system_ns: u64,
589}
590
591impl ProcessCpuTimeSnapshot {
592 #[cfg(unix)]
593 fn from_rusage_timevals(user: libc::timeval, system: libc::timeval) -> Option<Self> {
594 Some(Self {
595 user_ns: timeval_to_ns(user)?,
596 system_ns: timeval_to_ns(system)?,
597 })
598 }
599
600 fn total_ns(self) -> u64 {
601 self.user_ns.saturating_add(self.system_ns)
602 }
603}
604
605struct DefaultResourceToken {
606 cpu_time_start: Option<ProcessCpuTimeSnapshot>,
607 memory_sampler: Option<MemoryPeakSampler>,
608}
609
610impl ResourceMonitor for DefaultResourceMonitor {
611 type Token = DefaultResourceToken;
612
613 fn start(&mut self) -> Self::Token {
614 Self::Token {
615 cpu_time_start: current_process_cpu_time(),
616 memory_sampler: MemoryPeakSampler::start(),
617 }
618 }
619
620 fn finish(&mut self, token: Self::Token) -> IterationResourceUsage {
621 let cpu_time_ms = token
622 .cpu_time_start
623 .zip(current_process_cpu_time())
624 .and_then(|(start, end)| process_cpu_delta_ms(start, end));
625
626 IterationResourceUsage {
627 cpu_time_ms,
628 peak_memory_kb: token
629 .memory_sampler
630 .and_then(MemoryPeakSampler::stop)
631 .filter(|value| *value > 0),
632 }
633 }
634}
635
636fn round_ns_to_ms(ns: u64) -> u64 {
637 ((u128::from(ns) + 500_000) / 1_000_000) as u64
638}
639
640#[cfg(unix)]
641fn process_cpu_delta_ms(start: ProcessCpuTimeSnapshot, end: ProcessCpuTimeSnapshot) -> Option<u64> {
642 Some(round_ns_to_ms(
643 end.total_ns().checked_sub(start.total_ns())?,
644 ))
645}
646
647#[cfg(not(unix))]
648fn process_cpu_delta_ms(
649 _start: ProcessCpuTimeSnapshot,
650 _end: ProcessCpuTimeSnapshot,
651) -> Option<u64> {
652 None
653}
654
655#[cfg(unix)]
656fn timeval_to_ns(value: libc::timeval) -> Option<u64> {
657 let secs = u64::try_from(value.tv_sec).ok()?;
658 let micros = u64::try_from(value.tv_usec).ok()?;
659 Some(
660 secs.saturating_mul(1_000_000_000)
661 .saturating_add(micros.saturating_mul(1_000)),
662 )
663}
664
665#[cfg(unix)]
666fn current_process_cpu_time() -> Option<ProcessCpuTimeSnapshot> {
667 let mut usage = std::mem::MaybeUninit::<libc::rusage>::uninit();
668 let rc = unsafe { libc::getrusage(libc::RUSAGE_SELF, usage.as_mut_ptr()) };
669 if rc != 0 {
670 return None;
671 }
672
673 let usage = unsafe { usage.assume_init() };
674 ProcessCpuTimeSnapshot::from_rusage_timevals(usage.ru_utime, usage.ru_stime)
675}
676
677#[cfg(not(unix))]
678fn current_process_cpu_time() -> Option<ProcessCpuTimeSnapshot> {
679 None
680}
681
682const MEMORY_SAMPLER_INTERVAL: Duration = Duration::from_millis(1);
683type MemoryReader = Arc<dyn Fn() -> Option<u64> + Send + Sync + 'static>;
684
685struct MemoryPeakSampler {
686 baseline_kb: u64,
687 stop_flag: Arc<AtomicBool>,
688 peak_kb: Arc<AtomicU64>,
689 handle: JoinHandle<()>,
690}
691
692impl MemoryPeakSampler {
693 fn start() -> Option<Self> {
694 Self::start_with_reader(Arc::new(|| current_process_memory_kb()))
695 }
696
697 fn start_with_reader(reader: MemoryReader) -> Option<Self> {
698 let stop_flag = Arc::new(AtomicBool::new(false));
699 let peak_kb = Arc::new(AtomicU64::new(0));
700 let (ready_tx, ready_rx) = mpsc::sync_channel(1);
701 let (baseline_tx, baseline_rx) = mpsc::sync_channel(1);
702 let sampler_stop = Arc::clone(&stop_flag);
703 let sampler_peak = Arc::clone(&peak_kb);
704 let sampler_reader = Arc::clone(&reader);
705
706 let handle = thread::Builder::new()
707 .name("mobench-memory-sampler".to_string())
708 .spawn(move || {
709 let _ = sampler_reader();
713 let _ = ready_tx.send(());
714
715 let Some(baseline_kb) = baseline_rx.recv().ok().flatten() else {
716 return;
717 };
718 sampler_peak.store(baseline_kb, Ordering::Release);
719
720 while !sampler_stop.load(Ordering::Acquire) {
721 if let Some(current_kb) = sampler_reader() {
722 update_atomic_max(&sampler_peak, current_kb);
723 }
724 thread::sleep(MEMORY_SAMPLER_INTERVAL);
725 }
726
727 if let Some(current_kb) = sampler_reader() {
728 update_atomic_max(&sampler_peak, current_kb);
729 }
730 })
731 .ok()?;
732
733 if ready_rx.recv().is_err() {
734 stop_flag.store(true, Ordering::Release);
735 let _ = handle.join();
736 return None;
737 }
738
739 let baseline_kb = match reader() {
740 Some(value) => value,
741 None => {
742 let _ = baseline_tx.send(None);
743 stop_flag.store(true, Ordering::Release);
744 let _ = handle.join();
745 return None;
746 }
747 };
748 if baseline_tx.send(Some(baseline_kb)).is_err() {
749 stop_flag.store(true, Ordering::Release);
750 let _ = handle.join();
751 return None;
752 }
753
754 Some(Self {
755 baseline_kb,
756 stop_flag,
757 peak_kb,
758 handle,
759 })
760 }
761
762 fn stop(self) -> Option<u64> {
763 self.stop_flag.store(true, Ordering::Release);
764 let _ = self.handle.join();
765 let peak_kb = self.peak_kb.load(Ordering::Acquire);
766 Some(peak_kb.saturating_sub(self.baseline_kb))
767 }
768}
769
770fn update_atomic_max(target: &AtomicU64, value: u64) {
771 let mut current = target.load(Ordering::Relaxed);
772 while value > current {
773 match target.compare_exchange_weak(current, value, Ordering::Relaxed, Ordering::Relaxed) {
774 Ok(_) => break,
775 Err(observed) => current = observed,
776 }
777 }
778}
779
780#[cfg(any(target_os = "android", target_os = "linux"))]
781fn current_process_memory_kb() -> Option<u64> {
782 let statm = std::fs::read_to_string("/proc/self/statm").ok()?;
783 let resident_pages = statm
784 .split_whitespace()
785 .nth(1)
786 .and_then(|value| value.parse::<u64>().ok())?;
787 let page_size = unsafe { libc::sysconf(libc::_SC_PAGESIZE) };
788 if page_size <= 0 {
789 return None;
790 }
791 let page_size = u64::try_from(page_size).ok()?;
792 Some(resident_pages.saturating_mul(page_size) / 1024)
793}
794
795#[cfg(any(target_os = "ios", target_os = "macos"))]
796fn current_process_memory_kb() -> Option<u64> {
797 let mut info = std::mem::MaybeUninit::<libc::mach_task_basic_info_data_t>::uninit();
798 let mut count = libc::MACH_TASK_BASIC_INFO_COUNT;
799 #[allow(deprecated)]
800 let rc = unsafe {
801 libc::task_info(
802 libc::mach_task_self(),
803 libc::MACH_TASK_BASIC_INFO,
804 info.as_mut_ptr().cast::<libc::integer_t>(),
805 &mut count,
806 )
807 };
808 if rc != libc::KERN_SUCCESS {
809 return None;
810 }
811
812 let info = unsafe { info.assume_init() };
813 Some((info.resident_size / 1024) as u64)
814}
815
816#[cfg(not(any(
817 target_os = "android",
818 target_os = "linux",
819 target_os = "ios",
820 target_os = "macos"
821)))]
822fn current_process_memory_kb() -> Option<u64> {
823 None
824}
825
826fn measure_iteration<M, F>(
827 monitor: &mut M,
828 f: F,
829) -> Result<(BenchSample, Instant, Instant), TimingError>
830where
831 M: ResourceMonitor,
832 F: FnOnce() -> Result<(), TimingError>,
833{
834 let token = monitor.start();
835 let started_at = Instant::now();
836 let result = f();
837 let ended_at = Instant::now();
838 let resources = monitor.finish(token);
839 result.map(|_| {
840 (
841 BenchSample::from_measurement(ended_at.duration_since(started_at), resources),
842 started_at,
843 ended_at,
844 )
845 })
846}
847
848pub fn profile_phase<T>(name: &str, f: impl FnOnce() -> T) -> T {
853 let guard = SEMANTIC_PHASE_COLLECTOR.with(|collector| {
854 let mut collector = collector.borrow_mut();
855 match collector.enter_phase() {
856 Some(top_level) => SemanticPhaseGuard {
857 name: name.to_string(),
858 started_at: Some(Instant::now()),
859 top_level,
860 },
861 None => SemanticPhaseGuard {
862 name: String::new(),
863 started_at: None,
864 top_level: false,
865 },
866 }
867 });
868
869 let result = f();
870 drop(guard);
871 result
872}
873
874#[derive(Debug, Error)]
886pub enum TimingError {
887 #[error("iterations must be greater than zero (got {count}). Minimum recommended: 10")]
892 NoIterations {
893 count: u32,
895 },
896
897 #[error("benchmark function failed: {0}")]
901 Execution(String),
902}
903
904pub fn run_closure<F>(spec: BenchSpec, mut f: F) -> Result<BenchReport, TimingError>
966where
967 F: FnMut() -> Result<(), TimingError>,
968{
969 let mut monitor = DefaultResourceMonitor;
970 run_closure_with_monitor(spec, &mut monitor, move || f())
971}
972
973fn run_closure_with_monitor<F, M>(
974 spec: BenchSpec,
975 monitor: &mut M,
976 mut f: F,
977) -> Result<BenchReport, TimingError>
978where
979 F: FnMut() -> Result<(), TimingError>,
980 M: ResourceMonitor,
981{
982 if spec.iterations == 0 {
983 return Err(TimingError::NoIterations {
984 count: spec.iterations,
985 });
986 }
987
988 reset_semantic_phase_collection();
989 let harness_origin = Instant::now();
990 let mut timeline = Vec::new();
991
992 for iteration in 0..spec.warmup {
994 let phase_start = Instant::now();
995 f()?;
996 push_timeline_span(
997 &mut timeline,
998 harness_origin,
999 "warmup-benchmark",
1000 phase_start,
1001 Instant::now(),
1002 Some(iteration),
1003 );
1004 }
1005
1006 begin_semantic_phase_collection();
1008 let mut samples = Vec::with_capacity(spec.iterations as usize);
1009 for iteration in 0..spec.iterations {
1010 let (sample, start, end) = match measure_iteration(monitor, || f()) {
1011 Ok(measurement) => measurement,
1012 Err(err) => {
1013 let _ = finish_semantic_phase_collection();
1014 return Err(err);
1015 }
1016 };
1017 samples.push(sample);
1018 push_timeline_span(
1019 &mut timeline,
1020 harness_origin,
1021 "measured-benchmark",
1022 start,
1023 end,
1024 Some(iteration),
1025 );
1026 }
1027 let phases = finish_semantic_phase_collection();
1028
1029 Ok(BenchReport {
1030 spec,
1031 samples,
1032 phases,
1033 timeline,
1034 })
1035}
1036
1037pub fn run_closure_with_setup<S, T, F>(
1065 spec: BenchSpec,
1066 setup: S,
1067 mut f: F,
1068) -> Result<BenchReport, TimingError>
1069where
1070 S: FnOnce() -> T,
1071 F: FnMut(&T) -> Result<(), TimingError>,
1072{
1073 let mut monitor = DefaultResourceMonitor;
1074 run_closure_with_setup_with_monitor(spec, &mut monitor, setup, move |input| f(input))
1075}
1076
1077fn run_closure_with_setup_with_monitor<S, T, F, M>(
1078 spec: BenchSpec,
1079 monitor: &mut M,
1080 setup: S,
1081 mut f: F,
1082) -> Result<BenchReport, TimingError>
1083where
1084 S: FnOnce() -> T,
1085 F: FnMut(&T) -> Result<(), TimingError>,
1086 M: ResourceMonitor,
1087{
1088 if spec.iterations == 0 {
1089 return Err(TimingError::NoIterations {
1090 count: spec.iterations,
1091 });
1092 }
1093
1094 reset_semantic_phase_collection();
1095 let harness_origin = Instant::now();
1096 let mut timeline = Vec::new();
1097
1098 let setup_start = Instant::now();
1100 let input = setup();
1101 push_timeline_span(
1102 &mut timeline,
1103 harness_origin,
1104 "setup",
1105 setup_start,
1106 Instant::now(),
1107 None,
1108 );
1109
1110 for iteration in 0..spec.warmup {
1112 let phase_start = Instant::now();
1113 f(&input)?;
1114 push_timeline_span(
1115 &mut timeline,
1116 harness_origin,
1117 "warmup-benchmark",
1118 phase_start,
1119 Instant::now(),
1120 Some(iteration),
1121 );
1122 }
1123
1124 begin_semantic_phase_collection();
1126 let mut samples = Vec::with_capacity(spec.iterations as usize);
1127 for iteration in 0..spec.iterations {
1128 let (sample, start, end) = match measure_iteration(monitor, || f(&input)) {
1129 Ok(measurement) => measurement,
1130 Err(err) => {
1131 let _ = finish_semantic_phase_collection();
1132 return Err(err);
1133 }
1134 };
1135 samples.push(sample);
1136 push_timeline_span(
1137 &mut timeline,
1138 harness_origin,
1139 "measured-benchmark",
1140 start,
1141 end,
1142 Some(iteration),
1143 );
1144 }
1145 let phases = finish_semantic_phase_collection();
1146
1147 Ok(BenchReport {
1148 spec,
1149 samples,
1150 phases,
1151 timeline,
1152 })
1153}
1154
1155pub fn run_closure_with_setup_per_iter<S, T, F>(
1184 spec: BenchSpec,
1185 mut setup: S,
1186 mut f: F,
1187) -> Result<BenchReport, TimingError>
1188where
1189 S: FnMut() -> T,
1190 F: FnMut(T) -> Result<(), TimingError>,
1191{
1192 let mut monitor = DefaultResourceMonitor;
1193 run_closure_with_setup_per_iter_with_monitor(
1194 spec,
1195 &mut monitor,
1196 move || setup(),
1197 move |input| f(input),
1198 )
1199}
1200
1201fn run_closure_with_setup_per_iter_with_monitor<S, T, F, M>(
1202 spec: BenchSpec,
1203 monitor: &mut M,
1204 mut setup: S,
1205 mut f: F,
1206) -> Result<BenchReport, TimingError>
1207where
1208 S: FnMut() -> T,
1209 F: FnMut(T) -> Result<(), TimingError>,
1210 M: ResourceMonitor,
1211{
1212 if spec.iterations == 0 {
1213 return Err(TimingError::NoIterations {
1214 count: spec.iterations,
1215 });
1216 }
1217
1218 reset_semantic_phase_collection();
1219 let harness_origin = Instant::now();
1220 let mut timeline = Vec::new();
1221
1222 for iteration in 0..spec.warmup {
1224 let setup_start = Instant::now();
1225 let input = setup();
1226 push_timeline_span(
1227 &mut timeline,
1228 harness_origin,
1229 "fixture-setup",
1230 setup_start,
1231 Instant::now(),
1232 Some(iteration),
1233 );
1234 let phase_start = Instant::now();
1235 f(input)?;
1236 push_timeline_span(
1237 &mut timeline,
1238 harness_origin,
1239 "warmup-benchmark",
1240 phase_start,
1241 Instant::now(),
1242 Some(iteration),
1243 );
1244 }
1245
1246 begin_semantic_phase_collection();
1248 let mut samples = Vec::with_capacity(spec.iterations as usize);
1249 for iteration in 0..spec.iterations {
1250 let setup_start = Instant::now();
1251 let input = setup(); push_timeline_span(
1253 &mut timeline,
1254 harness_origin,
1255 "fixture-setup",
1256 setup_start,
1257 Instant::now(),
1258 Some(iteration),
1259 );
1260
1261 let (sample, start, end) = match measure_iteration(monitor, || f(input)) {
1262 Ok(measurement) => measurement,
1263 Err(err) => {
1264 let _ = finish_semantic_phase_collection();
1265 return Err(err);
1266 }
1267 };
1268 samples.push(sample);
1269 push_timeline_span(
1270 &mut timeline,
1271 harness_origin,
1272 "measured-benchmark",
1273 start,
1274 end,
1275 Some(iteration),
1276 );
1277 }
1278 let phases = finish_semantic_phase_collection();
1279
1280 Ok(BenchReport {
1281 spec,
1282 samples,
1283 phases,
1284 timeline,
1285 })
1286}
1287
1288pub fn run_closure_with_setup_teardown<S, T, F, D>(
1317 spec: BenchSpec,
1318 setup: S,
1319 mut f: F,
1320 teardown: D,
1321) -> Result<BenchReport, TimingError>
1322where
1323 S: FnOnce() -> T,
1324 F: FnMut(&T) -> Result<(), TimingError>,
1325 D: FnOnce(T),
1326{
1327 let mut monitor = DefaultResourceMonitor;
1328 run_closure_with_setup_teardown_with_monitor(
1329 spec,
1330 &mut monitor,
1331 setup,
1332 move |input| f(input),
1333 teardown,
1334 )
1335}
1336
1337fn run_closure_with_setup_teardown_with_monitor<S, T, F, D, M>(
1338 spec: BenchSpec,
1339 monitor: &mut M,
1340 setup: S,
1341 mut f: F,
1342 teardown: D,
1343) -> Result<BenchReport, TimingError>
1344where
1345 S: FnOnce() -> T,
1346 F: FnMut(&T) -> Result<(), TimingError>,
1347 D: FnOnce(T),
1348 M: ResourceMonitor,
1349{
1350 if spec.iterations == 0 {
1351 return Err(TimingError::NoIterations {
1352 count: spec.iterations,
1353 });
1354 }
1355
1356 reset_semantic_phase_collection();
1357 let harness_origin = Instant::now();
1358 let mut timeline = Vec::new();
1359
1360 let setup_start = Instant::now();
1362 let input = setup();
1363 push_timeline_span(
1364 &mut timeline,
1365 harness_origin,
1366 "setup",
1367 setup_start,
1368 Instant::now(),
1369 None,
1370 );
1371
1372 for iteration in 0..spec.warmup {
1374 let phase_start = Instant::now();
1375 f(&input)?;
1376 push_timeline_span(
1377 &mut timeline,
1378 harness_origin,
1379 "warmup-benchmark",
1380 phase_start,
1381 Instant::now(),
1382 Some(iteration),
1383 );
1384 }
1385
1386 begin_semantic_phase_collection();
1388 let mut samples = Vec::with_capacity(spec.iterations as usize);
1389 for iteration in 0..spec.iterations {
1390 let (sample, start, end) = match measure_iteration(monitor, || f(&input)) {
1391 Ok(measurement) => measurement,
1392 Err(err) => {
1393 let _ = finish_semantic_phase_collection();
1394 return Err(err);
1395 }
1396 };
1397 samples.push(sample);
1398 push_timeline_span(
1399 &mut timeline,
1400 harness_origin,
1401 "measured-benchmark",
1402 start,
1403 end,
1404 Some(iteration),
1405 );
1406 }
1407 let phases = finish_semantic_phase_collection();
1408
1409 let teardown_start = Instant::now();
1411 teardown(input);
1412 push_timeline_span(
1413 &mut timeline,
1414 harness_origin,
1415 "teardown",
1416 teardown_start,
1417 Instant::now(),
1418 None,
1419 );
1420
1421 Ok(BenchReport {
1422 spec,
1423 samples,
1424 phases,
1425 timeline,
1426 })
1427}
1428
1429#[cfg(test)]
1430mod tests {
1431 use super::*;
1432
1433 #[derive(Default)]
1434 struct FakeResourceMonitor {
1435 samples: Vec<IterationResourceUsage>,
1436 started: usize,
1437 finished: usize,
1438 }
1439
1440 impl FakeResourceMonitor {
1441 fn new(samples: Vec<IterationResourceUsage>) -> Self {
1442 Self {
1443 samples,
1444 started: 0,
1445 finished: 0,
1446 }
1447 }
1448 }
1449
1450 impl ResourceMonitor for FakeResourceMonitor {
1451 type Token = usize;
1452
1453 fn start(&mut self) -> Self::Token {
1454 let token = self.started;
1455 self.started += 1;
1456 assert!(
1457 token < self.samples.len(),
1458 "resource capture should only run for measured iterations"
1459 );
1460 token
1461 }
1462
1463 fn finish(&mut self, token: Self::Token) -> IterationResourceUsage {
1464 self.finished += 1;
1465 self.samples
1466 .get(token)
1467 .cloned()
1468 .expect("resource usage for measured iteration")
1469 }
1470 }
1471
1472 #[cfg(unix)]
1473 #[test]
1474 fn process_cpu_time_snapshot_sums_user_and_kernel_time() {
1475 let snapshot = ProcessCpuTimeSnapshot::from_rusage_timevals(
1476 libc::timeval {
1477 tv_sec: 1,
1478 tv_usec: 250_000,
1479 },
1480 libc::timeval {
1481 tv_sec: 0,
1482 tv_usec: 750_000,
1483 },
1484 )
1485 .expect("valid snapshot");
1486
1487 assert_eq!(snapshot.total_ns(), 2_000_000_000);
1488 }
1489
1490 #[cfg(unix)]
1491 #[test]
1492 fn process_cpu_time_delta_ms_uses_user_and_kernel_time() {
1493 let start = ProcessCpuTimeSnapshot::from_rusage_timevals(
1494 libc::timeval {
1495 tv_sec: 1,
1496 tv_usec: 250_000,
1497 },
1498 libc::timeval {
1499 tv_sec: 0,
1500 tv_usec: 750_000,
1501 },
1502 )
1503 .expect("valid start snapshot");
1504 let end = ProcessCpuTimeSnapshot::from_rusage_timevals(
1505 libc::timeval {
1506 tv_sec: 1,
1507 tv_usec: 900_000,
1508 },
1509 libc::timeval {
1510 tv_sec: 1,
1511 tv_usec: 400_600,
1512 },
1513 )
1514 .expect("valid end snapshot");
1515
1516 assert_eq!(process_cpu_delta_ms(start, end), Some(1_301));
1517 }
1518
1519 #[test]
1520 fn runs_benchmark_collects_requested_samples() {
1521 let spec = BenchSpec::new("noop", 3, 1).unwrap();
1522 let report = run_closure(spec, || Ok(())).unwrap();
1523
1524 assert_eq!(report.samples.len(), 3);
1525 assert_eq!(report.spec.name, "noop");
1526 assert_eq!(report.spec.iterations, 3);
1527 }
1528
1529 #[test]
1530 fn rejects_zero_iterations() {
1531 let result = BenchSpec::new("test", 0, 10);
1532 assert!(matches!(
1533 result,
1534 Err(TimingError::NoIterations { count: 0 })
1535 ));
1536 }
1537
1538 #[test]
1539 fn allows_zero_warmup() {
1540 let spec = BenchSpec::new("test", 5, 0).unwrap();
1541 assert_eq!(spec.warmup, 0);
1542
1543 let report = run_closure(spec, || Ok(())).unwrap();
1544 assert_eq!(report.samples.len(), 5);
1545 }
1546
1547 #[test]
1548 fn serializes_to_json() {
1549 let report = BenchReport {
1550 spec: BenchSpec::new("test", 10, 2).unwrap(),
1551 samples: vec![BenchSample {
1552 duration_ns: 1_000_000,
1553 cpu_time_ms: Some(42),
1554 peak_memory_kb: Some(512),
1555 }],
1556 phases: vec![SemanticPhase {
1557 name: "prove".to_string(),
1558 duration_ns: 1_000_000,
1559 }],
1560 timeline: vec![HarnessTimelineSpan {
1561 phase: "measured-benchmark".to_string(),
1562 start_offset_ns: 0,
1563 end_offset_ns: 1_000_000,
1564 iteration: Some(0),
1565 }],
1566 };
1567
1568 let json = serde_json::to_string(&report).unwrap();
1569 let restored: BenchReport = serde_json::from_str(&json).unwrap();
1570
1571 assert_eq!(restored.spec.name, "test");
1572 assert_eq!(restored.samples.len(), 1);
1573 assert_eq!(restored.samples[0].cpu_time_ms, Some(42));
1574 assert_eq!(restored.samples[0].peak_memory_kb, Some(512));
1575 assert_eq!(restored.phases.len(), 1);
1576 assert_eq!(restored.phases[0].name, "prove");
1577 assert!(restored.phases[0].duration_ns > 0);
1578 }
1579
1580 #[test]
1581 fn profile_phase_records_only_measured_iterations() {
1582 let spec = BenchSpec::new("semantic", 2, 1).unwrap();
1583 let mut call_index = 0u32;
1584 let report = run_closure(spec, || {
1585 let phase_name = if call_index == 0 {
1586 "warmup-only"
1587 } else {
1588 "prove"
1589 };
1590 call_index += 1;
1591 profile_phase(phase_name, || std::thread::sleep(Duration::from_millis(1)));
1592 Ok(())
1593 })
1594 .unwrap();
1595
1596 assert!(
1597 !report
1598 .phases
1599 .iter()
1600 .any(|phase| phase.name == "warmup-only"),
1601 "warmup phases should not be recorded"
1602 );
1603 let prove = report
1604 .phases
1605 .iter()
1606 .find(|phase| phase.name == "prove")
1607 .expect("prove phase");
1608 assert!(prove.duration_ns > 0);
1609 }
1610
1611 #[test]
1612 fn profile_phase_keeps_the_v1_model_flat() {
1613 let spec = BenchSpec::new("semantic-flat", 1, 0).unwrap();
1614 let report = run_closure(spec, || {
1615 profile_phase("prove", || {
1616 std::thread::sleep(Duration::from_millis(1));
1617 profile_phase("inner", || std::thread::sleep(Duration::from_millis(1)));
1618 });
1619 Ok(())
1620 })
1621 .unwrap();
1622
1623 assert!(report.phases.iter().any(|phase| phase.name == "prove"));
1624 assert!(
1625 !report.phases.iter().any(|phase| phase.name == "inner"),
1626 "nested phases should not create a second flat phase entry"
1627 );
1628 }
1629
1630 #[test]
1631 fn measured_cpu_excludes_warmup_iterations() {
1632 let spec = BenchSpec::new("cpu", 2, 1).unwrap();
1633 let mut monitor = FakeResourceMonitor::new(vec![
1634 IterationResourceUsage {
1635 cpu_time_ms: Some(11),
1636 peak_memory_kb: Some(32),
1637 },
1638 IterationResourceUsage {
1639 cpu_time_ms: Some(17),
1640 peak_memory_kb: Some(64),
1641 },
1642 ]);
1643 let mut calls = 0_u32;
1644
1645 let report = run_closure_with_monitor(spec, &mut monitor, || {
1646 calls += 1;
1647 Ok(())
1648 })
1649 .unwrap();
1650
1651 assert_eq!(calls, 3);
1652 assert_eq!(monitor.started, 2);
1653 assert_eq!(monitor.finished, 2);
1654 assert_eq!(
1655 report
1656 .samples
1657 .iter()
1658 .map(|sample| sample.cpu_time_ms)
1659 .collect::<Vec<_>>(),
1660 vec![Some(11), Some(17)]
1661 );
1662 assert_eq!(report.cpu_total_ms(), Some(28));
1663 }
1664
1665 #[test]
1666 fn measured_cpu_excludes_outer_harness_and_report_overhead() {
1667 let spec = BenchSpec::new("cpu-harness", 2, 1).unwrap();
1668 let mut monitor = FakeResourceMonitor::new(vec![
1669 IterationResourceUsage {
1670 cpu_time_ms: Some(5),
1671 peak_memory_kb: Some(12),
1672 },
1673 IterationResourceUsage {
1674 cpu_time_ms: Some(7),
1675 peak_memory_kb: Some(18),
1676 },
1677 ]);
1678
1679 let mut setup_calls = 0_u32;
1680 let mut teardown_calls = 0_u32;
1681 let report = run_closure_with_setup_teardown_with_monitor(
1682 spec,
1683 &mut monitor,
1684 || {
1685 setup_calls += 1;
1686 vec![1_u8, 2, 3]
1687 },
1688 |_fixture| Ok(()),
1689 |_fixture| {
1690 teardown_calls += 1;
1691 },
1692 )
1693 .unwrap();
1694
1695 let _serialized = serde_json::to_string(&report).unwrap();
1696
1697 assert_eq!(setup_calls, 1);
1698 assert_eq!(teardown_calls, 1);
1699 assert_eq!(monitor.started, 2);
1700 assert_eq!(report.cpu_total_ms(), Some(12));
1701 assert_eq!(report.cpu_median_ms(), Some(6));
1702 }
1703
1704 #[test]
1705 fn single_iteration_cpu_median_matches_the_measured_iteration() {
1706 let spec = BenchSpec::new("single", 1, 0).unwrap();
1707 let mut monitor = FakeResourceMonitor::new(vec![IterationResourceUsage {
1708 cpu_time_ms: Some(42),
1709 peak_memory_kb: Some(24),
1710 }]);
1711
1712 let report = run_closure_with_monitor(spec, &mut monitor, || Ok(())).unwrap();
1713
1714 assert_eq!(report.samples[0].cpu_time_ms, Some(42));
1715 assert_eq!(report.cpu_total_ms(), Some(42));
1716 assert_eq!(report.cpu_median_ms(), Some(42));
1717 }
1718
1719 #[test]
1720 fn multiple_iterations_export_the_median_cpu_sample() {
1721 let spec = BenchSpec::new("median", 3, 0).unwrap();
1722 let mut monitor = FakeResourceMonitor::new(vec![
1723 IterationResourceUsage {
1724 cpu_time_ms: Some(19),
1725 peak_memory_kb: Some(10),
1726 },
1727 IterationResourceUsage {
1728 cpu_time_ms: Some(7),
1729 peak_memory_kb: Some(30),
1730 },
1731 IterationResourceUsage {
1732 cpu_time_ms: Some(11),
1733 peak_memory_kb: Some(20),
1734 },
1735 ]);
1736
1737 let report = run_closure_with_monitor(spec, &mut monitor, || Ok(())).unwrap();
1738
1739 assert_eq!(report.cpu_median_ms(), Some(11));
1740 assert_eq!(report.cpu_total_ms(), Some(37));
1741 }
1742
1743 #[test]
1744 fn peak_memory_excludes_harness_baseline_overhead() {
1745 let spec = BenchSpec::new("memory", 2, 1).unwrap();
1746 let mut monitor = FakeResourceMonitor::new(vec![
1747 IterationResourceUsage {
1748 cpu_time_ms: Some(3),
1749 peak_memory_kb: Some(48),
1750 },
1751 IterationResourceUsage {
1752 cpu_time_ms: Some(4),
1753 peak_memory_kb: Some(96),
1754 },
1755 ]);
1756
1757 let report = run_closure_with_setup_teardown_with_monitor(
1758 spec,
1759 &mut monitor,
1760 || vec![0_u8; 1024],
1761 |_fixture| Ok(()),
1762 |_fixture| {},
1763 )
1764 .unwrap();
1765
1766 assert_eq!(
1767 report
1768 .samples
1769 .iter()
1770 .map(|sample| sample.peak_memory_kb)
1771 .collect::<Vec<_>>(),
1772 vec![Some(48), Some(96)]
1773 );
1774 assert_eq!(report.peak_memory_kb(), Some(96));
1775 }
1776
1777 #[test]
1778 fn memory_peak_sampler_uses_the_first_post_startup_sample_as_its_baseline() {
1779 use std::collections::VecDeque;
1780 use std::sync::{Arc, Mutex};
1781
1782 let samples = Arc::new(Mutex::new(VecDeque::from([
1783 Some(80_u64),
1784 Some(100_u64),
1785 Some(140_u64),
1786 Some(120_u64),
1787 ])));
1788 let reader_samples = Arc::clone(&samples);
1789 let reader = Arc::new(move || {
1790 reader_samples
1791 .lock()
1792 .expect("sample queue")
1793 .pop_front()
1794 .unwrap_or(Some(120))
1795 });
1796
1797 let sampler = MemoryPeakSampler::start_with_reader(reader).expect("sampler");
1798 let peak_kb = sampler.stop().expect("peak memory");
1799
1800 assert_eq!(peak_kb, 40);
1801 }
1802
1803 #[test]
1804 fn run_with_setup_calls_setup_once() {
1805 use std::sync::atomic::{AtomicU32, Ordering};
1806
1807 static SETUP_COUNT: AtomicU32 = AtomicU32::new(0);
1808 static RUN_COUNT: AtomicU32 = AtomicU32::new(0);
1809
1810 let spec = BenchSpec::new("test", 5, 2).unwrap();
1811 let report = run_closure_with_setup(
1812 spec,
1813 || {
1814 SETUP_COUNT.fetch_add(1, Ordering::SeqCst);
1815 vec![1, 2, 3]
1816 },
1817 |data| {
1818 RUN_COUNT.fetch_add(1, Ordering::SeqCst);
1819 std::hint::black_box(data.len());
1820 Ok(())
1821 },
1822 )
1823 .unwrap();
1824
1825 assert_eq!(SETUP_COUNT.load(Ordering::SeqCst), 1); assert_eq!(RUN_COUNT.load(Ordering::SeqCst), 7); assert_eq!(report.samples.len(), 5);
1828 }
1829
1830 #[test]
1831 fn run_with_setup_per_iter_calls_setup_each_time() {
1832 use std::sync::atomic::{AtomicU32, Ordering};
1833
1834 static SETUP_COUNT: AtomicU32 = AtomicU32::new(0);
1835
1836 let spec = BenchSpec::new("test", 3, 1).unwrap();
1837 let report = run_closure_with_setup_per_iter(
1838 spec,
1839 || {
1840 SETUP_COUNT.fetch_add(1, Ordering::SeqCst);
1841 vec![1, 2, 3]
1842 },
1843 |data| {
1844 std::hint::black_box(data);
1845 Ok(())
1846 },
1847 )
1848 .unwrap();
1849
1850 assert_eq!(SETUP_COUNT.load(Ordering::SeqCst), 4); assert_eq!(report.samples.len(), 3);
1852 }
1853
1854 #[test]
1855 fn run_with_setup_teardown_calls_both() {
1856 use std::sync::atomic::{AtomicU32, Ordering};
1857
1858 static SETUP_COUNT: AtomicU32 = AtomicU32::new(0);
1859 static TEARDOWN_COUNT: AtomicU32 = AtomicU32::new(0);
1860
1861 let spec = BenchSpec::new("test", 3, 1).unwrap();
1862 let report = run_closure_with_setup_teardown(
1863 spec,
1864 || {
1865 SETUP_COUNT.fetch_add(1, Ordering::SeqCst);
1866 "resource"
1867 },
1868 |_resource| Ok(()),
1869 |_resource| {
1870 TEARDOWN_COUNT.fetch_add(1, Ordering::SeqCst);
1871 },
1872 )
1873 .unwrap();
1874
1875 assert_eq!(SETUP_COUNT.load(Ordering::SeqCst), 1);
1876 assert_eq!(TEARDOWN_COUNT.load(Ordering::SeqCst), 1);
1877 assert_eq!(report.samples.len(), 3);
1878 }
1879
1880 #[test]
1881 fn bench_report_serializes_exact_harness_timeline() {
1882 let spec = BenchSpec::new("timeline", 2, 1).unwrap();
1883 let report = run_closure_with_setup_teardown(
1884 spec,
1885 || {
1886 std::thread::sleep(Duration::from_millis(1));
1887 "resource"
1888 },
1889 |_resource| {
1890 std::thread::sleep(Duration::from_millis(1));
1891 Ok(())
1892 },
1893 |_resource| {
1894 std::thread::sleep(Duration::from_millis(1));
1895 },
1896 )
1897 .unwrap();
1898
1899 let json = serde_json::to_value(&report).unwrap();
1900 assert_eq!(json["timeline"][0]["phase"], "setup");
1901 assert_eq!(json["timeline"][1]["phase"], "warmup-benchmark");
1902 assert_eq!(json["timeline"][2]["phase"], "measured-benchmark");
1903 assert_eq!(json["timeline"][3]["phase"], "measured-benchmark");
1904 assert_eq!(json["timeline"][4]["phase"], "teardown");
1905 }
1906}