Skip to main content

mobench_sdk/
timing.rs

1//! Lightweight benchmarking harness for mobile platforms.
2//!
3//! This module provides the core timing infrastructure for the mobench ecosystem.
4//! It was previously a separate crate (`mobench-runner`) but has been consolidated
5//! into `mobench-sdk` for a simpler dependency graph.
6//!
7//! The module is designed to be minimal and portable, with no platform-specific
8//! dependencies, making it suitable for compilation to Android and iOS targets.
9//!
10//! ## Overview
11//!
12//! The timing module executes benchmark functions with:
13//! - Configurable warmup iterations
14//! - Precise nanosecond-resolution timing
15//! - Simple, serializable results
16//!
17//! ## Usage
18//!
19//! Most users should use this via the higher-level [`crate::run_benchmark`] function
20//! or [`crate::BenchmarkBuilder`]. Direct usage is for custom integrations:
21//!
22//! ```
23//! use mobench_sdk::timing::{BenchSpec, run_closure, TimingError};
24//!
25//! // Define a benchmark specification
26//! let spec = BenchSpec::new("my_benchmark", 100, 10)?;
27//!
28//! // Run the benchmark
29//! let report = run_closure(spec, || {
30//!     // Your benchmark code
31//!     let sum: u64 = (0..1000).sum();
32//!     std::hint::black_box(sum);
33//!     Ok(())
34//! })?;
35//!
36//! // Analyze results
37//! let mean_ns = report.samples.iter()
38//!     .map(|s| s.duration_ns)
39//!     .sum::<u64>() / report.samples.len() as u64;
40//!
41//! println!("Mean: {} ns", mean_ns);
42//! # Ok::<(), TimingError>(())
43//! ```
44//!
45//! ## Types
46//!
47//! | Type | Description |
48//! |------|-------------|
49//! | [`BenchSpec`] | Benchmark configuration (name, iterations, warmup) |
50//! | [`BenchSample`] | Single timing measurement in nanoseconds |
51//! | [`BenchReport`] | Complete results with all samples |
52//! | [`TimingError`] | Error conditions during benchmarking |
53//!
54//! ## Feature Flags
55//!
56//! This module is always available. When using `mobench-sdk` with default features,
57//! you also get build automation and template generation. For minimal binary size
58//! (e.g., on mobile targets), use the `runner-only` feature:
59//!
60//! ```toml
61//! [dependencies]
62//! mobench-sdk = { version = "0.1.37", default-features = false, features = ["runner-only"] }
63//! ```
64
65use serde::{Deserialize, Serialize};
66use std::cell::RefCell;
67use std::sync::{Arc, mpsc};
68use std::thread::{self, JoinHandle};
69use std::time::{Duration, Instant};
70use thiserror::Error;
71
72/// Benchmark specification defining what and how to benchmark.
73///
74/// Contains the benchmark name, number of measurement iterations, and
75/// warmup iterations to perform before measuring.
76///
77/// # Example
78///
79/// ```
80/// use mobench_sdk::timing::BenchSpec;
81///
82/// // Create a spec for 100 iterations with 10 warmup runs
83/// let spec = BenchSpec::new("sorting_benchmark", 100, 10)?;
84///
85/// assert_eq!(spec.name, "sorting_benchmark");
86/// assert_eq!(spec.iterations, 100);
87/// assert_eq!(spec.warmup, 10);
88/// # Ok::<(), mobench_sdk::timing::TimingError>(())
89/// ```
90///
91/// # Serialization
92///
93/// `BenchSpec` implements `Serialize` and `Deserialize` for JSON persistence:
94///
95/// ```
96/// use mobench_sdk::timing::BenchSpec;
97///
98/// let spec = BenchSpec {
99///     name: "my_bench".to_string(),
100///     iterations: 50,
101///     warmup: 5,
102/// };
103///
104/// let json = serde_json::to_string(&spec)?;
105/// let restored: BenchSpec = serde_json::from_str(&json)?;
106///
107/// assert_eq!(spec.name, restored.name);
108/// # Ok::<(), serde_json::Error>(())
109/// ```
110#[derive(Clone, Debug, Serialize, Deserialize)]
111pub struct BenchSpec {
112    /// Name of the benchmark, typically the fully-qualified function name.
113    ///
114    /// Examples: `"my_crate::fibonacci"`, `"sorting_benchmark"`
115    pub name: String,
116
117    /// Number of iterations to measure.
118    ///
119    /// Each iteration produces one [`BenchSample`]. Must be greater than zero.
120    pub iterations: u32,
121
122    /// Number of warmup iterations before measurement.
123    ///
124    /// Warmup iterations are not recorded. They allow CPU caches to warm
125    /// and any JIT compilation to complete. Can be zero.
126    pub warmup: u32,
127}
128
129impl BenchSpec {
130    /// Creates a new benchmark specification.
131    ///
132    /// # Arguments
133    ///
134    /// * `name` - Name identifier for the benchmark
135    /// * `iterations` - Number of measured iterations (must be > 0)
136    /// * `warmup` - Number of warmup iterations (can be 0)
137    ///
138    /// # Errors
139    ///
140    /// Returns [`TimingError::NoIterations`] if `iterations` is zero.
141    ///
142    /// # Example
143    ///
144    /// ```
145    /// use mobench_sdk::timing::BenchSpec;
146    ///
147    /// let spec = BenchSpec::new("test", 100, 10)?;
148    /// assert_eq!(spec.iterations, 100);
149    ///
150    /// // Zero iterations is an error
151    /// let err = BenchSpec::new("test", 0, 10);
152    /// assert!(err.is_err());
153    /// # Ok::<(), mobench_sdk::timing::TimingError>(())
154    /// ```
155    pub fn new(name: impl Into<String>, iterations: u32, warmup: u32) -> Result<Self, TimingError> {
156        if iterations == 0 {
157            return Err(TimingError::NoIterations { count: iterations });
158        }
159
160        Ok(Self {
161            name: name.into(),
162            iterations,
163            warmup,
164        })
165    }
166}
167
168/// A single timing sample from a benchmark iteration.
169///
170/// Holds the elapsed wall time in nanoseconds plus optional per-iteration
171/// resource metrics (CPU time, peak memory growth, and process peak memory).
172/// The optional fields are only populated on platforms where the harness can
173/// capture them and are skipped from the JSON output when absent.
174///
175/// # Example
176///
177/// ```
178/// use mobench_sdk::timing::BenchSample;
179///
180/// let sample = BenchSample {
181///     duration_ns: 1_500_000,
182///     ..Default::default()
183/// };
184///
185/// // Convert to milliseconds
186/// let ms = sample.duration_ns as f64 / 1_000_000.0;
187/// assert_eq!(ms, 1.5);
188/// ```
189#[derive(Clone, Debug, Default, Serialize, Deserialize)]
190pub struct BenchSample {
191    /// Duration of the iteration in nanoseconds.
192    ///
193    /// Measured using [`std::time::Instant`] for monotonic, high-resolution timing.
194    pub duration_ns: u64,
195
196    /// CPU time consumed by the measured iteration in milliseconds.
197    ///
198    /// This is captured around the measured benchmark closure only and excludes
199    /// warmup, setup, teardown, and report generation overhead.
200    #[serde(default, skip_serializing_if = "Option::is_none")]
201    pub cpu_time_ms: Option<u64>,
202
203    /// Peak memory growth during the measured iteration in kilobytes.
204    ///
205    /// This legacy wire field is baseline-adjusted immediately before the
206    /// measured closure enters. It reports growth during the measured
207    /// iteration, not absolute process or device peak memory.
208    #[serde(default, skip_serializing_if = "Option::is_none")]
209    pub peak_memory_kb: Option<u64>,
210
211    /// Peak resident memory of the benchmark process during the measured iteration.
212    ///
213    /// This is sampled from the current process while the measured closure is
214    /// running. Unlike `peak_memory_kb`, it is not baseline-adjusted.
215    #[serde(default, skip_serializing_if = "Option::is_none")]
216    pub process_peak_memory_kb: Option<u64>,
217}
218
219impl BenchSample {
220    fn from_measurement(duration: Duration, resources: IterationResourceUsage) -> Self {
221        Self {
222            duration_ns: duration.as_nanos() as u64,
223            cpu_time_ms: resources.cpu_time_ms,
224            peak_memory_kb: resources.peak_memory_kb,
225            process_peak_memory_kb: resources.process_peak_memory_kb,
226        }
227    }
228}
229
230/// Complete benchmark report with all timing samples.
231///
232/// Contains the original specification and all collected samples.
233/// Can be serialized to JSON for storage or transmission.
234///
235/// # Example
236///
237/// ```
238/// use mobench_sdk::timing::{BenchSpec, run_closure};
239///
240/// let spec = BenchSpec::new("example", 50, 5)?;
241/// let report = run_closure(spec, || {
242///     std::hint::black_box(42);
243///     Ok(())
244/// })?;
245///
246/// // Calculate statistics
247/// let samples: Vec<u64> = report.samples.iter()
248///     .map(|s| s.duration_ns)
249///     .collect();
250///
251/// let min = samples.iter().min().unwrap();
252/// let max = samples.iter().max().unwrap();
253/// let mean = samples.iter().sum::<u64>() / samples.len() as u64;
254///
255/// println!("Min: {} ns, Max: {} ns, Mean: {} ns", min, max, mean);
256/// # Ok::<(), mobench_sdk::timing::TimingError>(())
257/// ```
258#[derive(Clone, Debug, Serialize, Deserialize)]
259pub struct BenchReport {
260    /// The specification used for this benchmark run.
261    pub spec: BenchSpec,
262
263    /// All collected timing samples.
264    ///
265    /// The length equals `spec.iterations`. Samples are in execution order.
266    pub samples: Vec<BenchSample>,
267
268    /// Optional semantic phase timings captured during measured iterations.
269    ///
270    /// Defaults to an empty vector when deserializing reports produced by
271    /// older mobench versions that did not emit phase data.
272    #[serde(default, skip_serializing_if = "Vec::is_empty")]
273    pub phases: Vec<SemanticPhase>,
274
275    /// Exact harness timeline spans in execution order.
276    ///
277    /// Defaults to an empty vector when deserializing reports produced by
278    /// older mobench versions that did not emit timeline data.
279    #[serde(default, skip_serializing_if = "Vec::is_empty")]
280    pub timeline: Vec<HarnessTimelineSpan>,
281}
282
283#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
284pub struct HarnessTimelineSpan {
285    pub phase: String,
286    pub start_offset_ns: u64,
287    pub end_offset_ns: u64,
288    pub iteration: Option<u32>,
289}
290
291impl BenchReport {
292    /// Returns the mean (average) duration in nanoseconds.
293    #[must_use]
294    pub fn mean_ns(&self) -> f64 {
295        if self.samples.is_empty() {
296            return 0.0;
297        }
298        let sum: u64 = self.samples.iter().map(|s| s.duration_ns).sum();
299        sum as f64 / self.samples.len() as f64
300    }
301
302    /// Returns the median duration in nanoseconds.
303    #[must_use]
304    pub fn median_ns(&self) -> f64 {
305        if self.samples.is_empty() {
306            return 0.0;
307        }
308        let mut sorted: Vec<u64> = self.samples.iter().map(|s| s.duration_ns).collect();
309        sorted.sort_unstable();
310        let len = sorted.len();
311        if len % 2 == 0 {
312            (sorted[len / 2 - 1] + sorted[len / 2]) as f64 / 2.0
313        } else {
314            sorted[len / 2] as f64
315        }
316    }
317
318    /// Returns the standard deviation in nanoseconds (sample std dev, n-1).
319    #[must_use]
320    pub fn std_dev_ns(&self) -> f64 {
321        if self.samples.len() < 2 {
322            return 0.0;
323        }
324        let mean = self.mean_ns();
325        let variance: f64 = self
326            .samples
327            .iter()
328            .map(|s| {
329                let diff = s.duration_ns as f64 - mean;
330                diff * diff
331            })
332            .sum::<f64>()
333            / (self.samples.len() - 1) as f64;
334        variance.sqrt()
335    }
336
337    /// Returns the given percentile (0-100) in nanoseconds.
338    #[must_use]
339    pub fn percentile_ns(&self, p: f64) -> f64 {
340        if self.samples.is_empty() {
341            return 0.0;
342        }
343        let mut sorted: Vec<u64> = self.samples.iter().map(|s| s.duration_ns).collect();
344        sorted.sort_unstable();
345        let p = p.clamp(0.0, 100.0) / 100.0;
346        let index = (p * (sorted.len() - 1) as f64).round() as usize;
347        sorted[index.min(sorted.len() - 1)] as f64
348    }
349
350    /// Returns the minimum duration in nanoseconds.
351    #[must_use]
352    pub fn min_ns(&self) -> u64 {
353        self.samples
354            .iter()
355            .map(|s| s.duration_ns)
356            .min()
357            .unwrap_or(0)
358    }
359
360    /// Returns the maximum duration in nanoseconds.
361    #[must_use]
362    pub fn max_ns(&self) -> u64 {
363        self.samples
364            .iter()
365            .map(|s| s.duration_ns)
366            .max()
367            .unwrap_or(0)
368    }
369
370    /// Returns the total measured CPU time in milliseconds across all iterations.
371    #[must_use]
372    pub fn cpu_total_ms(&self) -> Option<u64> {
373        let values = self
374            .samples
375            .iter()
376            .filter_map(|sample| sample.cpu_time_ms)
377            .collect::<Vec<_>>();
378        if values.is_empty() {
379            return None;
380        }
381
382        let total = values
383            .iter()
384            .fold(0_u128, |sum, value| sum.saturating_add(u128::from(*value)));
385        Some(total.min(u128::from(u64::MAX)) as u64)
386    }
387
388    /// Returns the median measured CPU time in milliseconds across all iterations.
389    #[must_use]
390    pub fn cpu_median_ms(&self) -> Option<u64> {
391        let mut values = self
392            .samples
393            .iter()
394            .filter_map(|sample| sample.cpu_time_ms)
395            .collect::<Vec<_>>();
396        if values.is_empty() {
397            return None;
398        }
399
400        values.sort_unstable();
401        let len = values.len();
402        Some(if len % 2 == 0 {
403            let lower = u128::from(values[(len / 2) - 1]);
404            let upper = u128::from(values[len / 2]);
405            ((lower + upper) / 2) as u64
406        } else {
407            values[len / 2]
408        })
409    }
410
411    /// Returns the maximum baseline-adjusted peak memory growth in kilobytes.
412    ///
413    /// This is the legacy accessor for the serialized `peak_memory_kb` sample
414    /// field. It does not report absolute process or device peak memory.
415    #[must_use]
416    pub fn peak_memory_kb(&self) -> Option<u64> {
417        self.samples
418            .iter()
419            .filter_map(|sample| sample.peak_memory_kb)
420            .max()
421    }
422
423    /// Returns the maximum baseline-adjusted peak memory growth in kilobytes.
424    ///
425    /// This is an explicit alias for [`BenchReport::peak_memory_kb`] to make the
426    /// growth semantics clear while preserving the legacy wire field.
427    #[must_use]
428    #[doc(alias = "peak_memory_kb")]
429    pub fn peak_memory_growth_kb(&self) -> Option<u64> {
430        self.peak_memory_kb()
431    }
432
433    /// Returns the maximum process resident memory peak in kilobytes.
434    ///
435    /// This reports the current benchmark process peak sampled during measured
436    /// iterations. It excludes BrowserStack/session-level provider memory.
437    #[must_use]
438    pub fn process_peak_memory_kb(&self) -> Option<u64> {
439        self.samples
440            .iter()
441            .filter_map(|sample| sample.process_peak_memory_kb)
442            .max()
443    }
444
445    /// Returns a statistical summary of the benchmark results.
446    #[must_use]
447    pub fn summary(&self) -> BenchSummary {
448        BenchSummary {
449            name: self.spec.name.clone(),
450            iterations: self.samples.len() as u32,
451            warmup: self.spec.warmup,
452            mean_ns: self.mean_ns(),
453            median_ns: self.median_ns(),
454            std_dev_ns: self.std_dev_ns(),
455            min_ns: self.min_ns(),
456            max_ns: self.max_ns(),
457            p95_ns: self.percentile_ns(95.0),
458            p99_ns: self.percentile_ns(99.0),
459        }
460    }
461}
462
463#[derive(Clone, Debug, Default)]
464struct IterationResourceUsage {
465    cpu_time_ms: Option<u64>,
466    peak_memory_kb: Option<u64>,
467    process_peak_memory_kb: Option<u64>,
468}
469
470fn instant_offset_ns(origin: Instant, instant: Instant) -> u64 {
471    instant
472        .duration_since(origin)
473        .as_nanos()
474        .min(u128::from(u64::MAX)) as u64
475}
476
477fn push_timeline_span(
478    timeline: &mut Vec<HarnessTimelineSpan>,
479    origin: Instant,
480    phase: &str,
481    started_at: Instant,
482    ended_at: Instant,
483    iteration: Option<u32>,
484) {
485    timeline.push(HarnessTimelineSpan {
486        phase: phase.to_string(),
487        start_offset_ns: instant_offset_ns(origin, started_at),
488        end_offset_ns: instant_offset_ns(origin, ended_at),
489        iteration,
490    });
491}
492
493/// Statistical summary of benchmark results.
494#[derive(Clone, Debug, Serialize, Deserialize)]
495pub struct BenchSummary {
496    /// Name of the benchmark.
497    pub name: String,
498    /// Number of measured iterations.
499    pub iterations: u32,
500    /// Number of warmup iterations.
501    pub warmup: u32,
502    /// Mean duration in nanoseconds.
503    pub mean_ns: f64,
504    /// Median duration in nanoseconds.
505    pub median_ns: f64,
506    /// Standard deviation in nanoseconds.
507    pub std_dev_ns: f64,
508    /// Minimum duration in nanoseconds.
509    pub min_ns: u64,
510    /// Maximum duration in nanoseconds.
511    pub max_ns: u64,
512    /// 95th percentile in nanoseconds.
513    pub p95_ns: f64,
514    /// 99th percentile in nanoseconds.
515    pub p99_ns: f64,
516}
517
518/// Flat semantic phase timing captured during a benchmark run.
519#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
520pub struct SemanticPhase {
521    pub name: String,
522    pub duration_ns: u64,
523}
524
525#[derive(Default)]
526struct SemanticPhaseCollector {
527    enabled: bool,
528    depth: usize,
529    phases: Vec<SemanticPhase>,
530}
531
532impl SemanticPhaseCollector {
533    fn reset(&mut self) {
534        self.enabled = false;
535        self.depth = 0;
536        self.phases.clear();
537    }
538
539    fn begin_measurement(&mut self) {
540        self.reset();
541        self.enabled = true;
542    }
543
544    fn finish(&mut self) -> Vec<SemanticPhase> {
545        self.enabled = false;
546        self.depth = 0;
547        std::mem::take(&mut self.phases)
548    }
549
550    fn enter_phase(&mut self) -> Option<bool> {
551        if !self.enabled {
552            return None;
553        }
554        let top_level = self.depth == 0;
555        self.depth += 1;
556        Some(top_level)
557    }
558
559    fn exit_phase(&mut self, name: &str, top_level: bool, elapsed: Duration) {
560        self.depth = self.depth.saturating_sub(1);
561        if !self.enabled || !top_level {
562            return;
563        }
564
565        let duration_ns = elapsed.as_nanos().min(u128::from(u64::MAX)) as u64;
566        if let Some(phase) = self.phases.iter_mut().find(|phase| phase.name == name) {
567            phase.duration_ns = phase.duration_ns.saturating_add(duration_ns);
568        } else {
569            self.phases.push(SemanticPhase {
570                name: name.to_string(),
571                duration_ns,
572            });
573        }
574    }
575}
576
577thread_local! {
578    static SEMANTIC_PHASE_COLLECTOR: RefCell<SemanticPhaseCollector> =
579        RefCell::new(SemanticPhaseCollector::default());
580}
581
582struct SemanticPhaseGuard {
583    name: String,
584    started_at: Option<Instant>,
585    top_level: bool,
586}
587
588impl Drop for SemanticPhaseGuard {
589    fn drop(&mut self) {
590        let Some(started_at) = self.started_at else {
591            return;
592        };
593
594        let elapsed = started_at.elapsed();
595        SEMANTIC_PHASE_COLLECTOR.with(|collector| {
596            collector
597                .borrow_mut()
598                .exit_phase(&self.name, self.top_level, elapsed);
599        });
600    }
601}
602
603fn reset_semantic_phase_collection() {
604    SEMANTIC_PHASE_COLLECTOR.with(|collector| collector.borrow_mut().reset());
605}
606
607fn begin_semantic_phase_collection() {
608    SEMANTIC_PHASE_COLLECTOR.with(|collector| collector.borrow_mut().begin_measurement());
609}
610
611fn finish_semantic_phase_collection() -> Vec<SemanticPhase> {
612    SEMANTIC_PHASE_COLLECTOR.with(|collector| collector.borrow_mut().finish())
613}
614
615trait ResourceMonitor {
616    type Token;
617
618    fn start(&mut self) -> Self::Token;
619
620    fn finish(&mut self, token: Self::Token) -> IterationResourceUsage;
621}
622
623#[derive(Default)]
624struct DefaultResourceMonitor {
625    /// Lazily-initialized long-lived sampler shared across measured iterations.
626    ///
627    /// We pay thread-spawn cost once per benchmark function rather than per
628    /// iteration. On constrained mobile devices (Android/Bionic) thread
629    /// creation is significantly more expensive than on desktop Linux, and
630    /// 1000+ iteration benchmarks would otherwise spawn 1000+ throwaway
631    /// threads.
632    memory_sampler: Option<PersistentMemorySampler>,
633    /// Set after the first attempt to start the sampler so we do not retry
634    /// on platforms where the sampler is not supported.
635    sampler_init_attempted: bool,
636}
637
638#[derive(Clone, Copy, Debug, PartialEq, Eq)]
639struct ProcessCpuTimeSnapshot {
640    user_ns: u64,
641    system_ns: u64,
642}
643
644impl ProcessCpuTimeSnapshot {
645    #[cfg(unix)]
646    fn from_rusage_timevals(user: libc::timeval, system: libc::timeval) -> Option<Self> {
647        Some(Self {
648            user_ns: timeval_to_ns(user)?,
649            system_ns: timeval_to_ns(system)?,
650        })
651    }
652
653    fn total_ns(self) -> u64 {
654        self.user_ns.saturating_add(self.system_ns)
655    }
656}
657
658struct DefaultResourceToken {
659    cpu_time_start: Option<ProcessCpuTimeSnapshot>,
660    /// True if the persistent sampler accepted a `Begin` for this iteration
661    /// and we therefore expect a corresponding result on `finish`.
662    has_memory_window: bool,
663}
664
665impl ResourceMonitor for DefaultResourceMonitor {
666    type Token = DefaultResourceToken;
667
668    fn start(&mut self) -> Self::Token {
669        if !self.sampler_init_attempted {
670            self.memory_sampler = PersistentMemorySampler::start();
671            self.sampler_init_attempted = true;
672        }
673        let has_memory_window = self
674            .memory_sampler
675            .as_ref()
676            .is_some_and(PersistentMemorySampler::begin_window);
677        Self::Token {
678            cpu_time_start: current_process_cpu_time(),
679            has_memory_window,
680        }
681    }
682
683    fn finish(&mut self, token: Self::Token) -> IterationResourceUsage {
684        let cpu_time_ms = token
685            .cpu_time_start
686            .zip(current_process_cpu_time())
687            .and_then(|(start, end)| process_cpu_delta_ms(start, end));
688
689        let memory_peak = if token.has_memory_window {
690            self.memory_sampler
691                .as_ref()
692                .and_then(PersistentMemorySampler::end_window)
693        } else {
694            None
695        };
696
697        IterationResourceUsage {
698            cpu_time_ms,
699            peak_memory_kb: memory_peak
700                .and_then(|peak| (peak.growth_kb > 0).then_some(peak.growth_kb)),
701            process_peak_memory_kb: memory_peak
702                .and_then(|peak| (peak.process_peak_kb > 0).then_some(peak.process_peak_kb)),
703        }
704    }
705}
706
707fn round_ns_to_ms(ns: u64) -> u64 {
708    ((u128::from(ns) + 500_000) / 1_000_000) as u64
709}
710
711#[cfg(unix)]
712fn process_cpu_delta_ms(start: ProcessCpuTimeSnapshot, end: ProcessCpuTimeSnapshot) -> Option<u64> {
713    Some(round_ns_to_ms(
714        end.total_ns().checked_sub(start.total_ns())?,
715    ))
716}
717
718#[cfg(not(unix))]
719fn process_cpu_delta_ms(
720    _start: ProcessCpuTimeSnapshot,
721    _end: ProcessCpuTimeSnapshot,
722) -> Option<u64> {
723    None
724}
725
726#[cfg(unix)]
727fn timeval_to_ns(value: libc::timeval) -> Option<u64> {
728    let secs = u64::try_from(value.tv_sec).ok()?;
729    let micros = u64::try_from(value.tv_usec).ok()?;
730    Some(
731        secs.saturating_mul(1_000_000_000)
732            .saturating_add(micros.saturating_mul(1_000)),
733    )
734}
735
736#[cfg(unix)]
737fn current_process_cpu_time() -> Option<ProcessCpuTimeSnapshot> {
738    let mut usage = std::mem::MaybeUninit::<libc::rusage>::uninit();
739    // SAFETY: `RUSAGE_SELF` is always a valid `who` value and the kernel
740    // writes a fully-initialized `rusage` into the provided pointer on
741    // success. We bail out via `rc != 0` before touching the buffer below.
742    let rc = unsafe { libc::getrusage(libc::RUSAGE_SELF, usage.as_mut_ptr()) };
743    if rc != 0 {
744        return None;
745    }
746
747    // SAFETY: `getrusage` returned 0, so the buffer is fully initialized.
748    let usage = unsafe { usage.assume_init() };
749    ProcessCpuTimeSnapshot::from_rusage_timevals(usage.ru_utime, usage.ru_stime)
750}
751
752#[cfg(not(unix))]
753fn current_process_cpu_time() -> Option<ProcessCpuTimeSnapshot> {
754    None
755}
756
757const MEMORY_SAMPLER_INTERVAL: Duration = Duration::from_millis(1);
758type MemoryReader = Arc<dyn Fn() -> Option<u64> + Send + Sync + 'static>;
759
760#[derive(Clone, Copy, Debug, PartialEq, Eq)]
761struct ProcessMemoryPeak {
762    growth_kb: u64,
763    process_peak_kb: u64,
764}
765
766/// Long-lived memory sampler. Spawned once per benchmark function and reused
767/// across every measured iteration via `begin_window` / `end_window`.
768///
769/// Replaces the previous per-iteration design that spawned and joined a fresh
770/// thread for every sample. On Android (Bionic) and iOS that thread-creation
771/// overhead is non-trivial and would inflate harness wall time on
772/// high-iteration runs.
773struct PersistentMemorySampler {
774    cmd_tx: mpsc::SyncSender<SamplerCmd>,
775    result_rx: mpsc::Receiver<Option<ProcessMemoryPeak>>,
776    handle: Option<JoinHandle<()>>,
777}
778
779enum SamplerCmd {
780    Begin(mpsc::SyncSender<bool>),
781    End,
782    Shutdown,
783}
784
785impl PersistentMemorySampler {
786    fn start() -> Option<Self> {
787        Self::start_with_reader(Arc::new(current_process_memory_kb))
788    }
789
790    fn start_with_reader(reader: MemoryReader) -> Option<Self> {
791        let (cmd_tx, cmd_rx) = mpsc::sync_channel::<SamplerCmd>(1);
792        let (result_tx, result_rx) = mpsc::sync_channel::<Option<ProcessMemoryPeak>>(1);
793        let (ready_tx, ready_rx) = mpsc::sync_channel::<()>(1);
794
795        let handle = thread::Builder::new()
796            .name("mobench-memory-sampler".to_string())
797            .spawn(move || {
798                // Touch the sampler thread's own stack and runtime state once
799                // before any window opens so its initialization cost cannot
800                // contaminate the first iteration's baseline measurement.
801                let _ = reader();
802                if ready_tx.send(()).is_err() {
803                    return;
804                }
805                drop(ready_tx);
806
807                Self::run(reader, &cmd_rx, &result_tx);
808            })
809            .ok()?;
810
811        if ready_rx.recv().is_err() {
812            // Thread failed before sending readiness. Send Shutdown to make
813            // sure it does not get stuck on a later cmd recv, then join.
814            let _ = cmd_tx.send(SamplerCmd::Shutdown);
815            let _ = handle.join();
816            return None;
817        }
818
819        Some(Self {
820            cmd_tx,
821            result_rx,
822            handle: Some(handle),
823        })
824    }
825
826    fn run(
827        reader: MemoryReader,
828        cmd_rx: &mpsc::Receiver<SamplerCmd>,
829        result_tx: &mpsc::SyncSender<Option<ProcessMemoryPeak>>,
830    ) {
831        while let Ok(cmd) = cmd_rx.recv() {
832            match cmd {
833                SamplerCmd::Begin(ack_tx) => {
834                    let baseline = match reader() {
835                        Some(v) => v,
836                        None => {
837                            let _ = ack_tx.send(false);
838                            continue;
839                        }
840                    };
841                    if ack_tx.send(true).is_err() {
842                        continue;
843                    }
844                    let mut peak = baseline;
845                    let shutting_down = loop {
846                        match cmd_rx.recv_timeout(MEMORY_SAMPLER_INTERVAL) {
847                            Ok(SamplerCmd::End) => break false,
848                            Ok(SamplerCmd::Shutdown) => break true,
849                            // A stray Begin while a window is already open
850                            // means the producer side desynced — preserve
851                            // existing behavior by ignoring it.
852                            Ok(SamplerCmd::Begin(ack_tx)) => {
853                                let _ = ack_tx.send(false);
854                            }
855                            Err(mpsc::RecvTimeoutError::Timeout) => {
856                                if let Some(current) = reader()
857                                    && current > peak
858                                {
859                                    peak = current;
860                                }
861                            }
862                            Err(mpsc::RecvTimeoutError::Disconnected) => break true,
863                        }
864                    };
865                    // One last sample after the window closes so a final
866                    // allocation that happens between the last poll and the
867                    // End command is still accounted for.
868                    if let Some(current) = reader()
869                        && current > peak
870                    {
871                        peak = current;
872                    }
873                    let _ = result_tx.send(Some(ProcessMemoryPeak {
874                        growth_kb: peak.saturating_sub(baseline),
875                        process_peak_kb: peak,
876                    }));
877                    if shutting_down {
878                        return;
879                    }
880                }
881                SamplerCmd::Shutdown => return,
882                // End without an active Begin — ignore.
883                SamplerCmd::End => {}
884            }
885        }
886    }
887
888    fn begin_window(&self) -> bool {
889        let (ack_tx, ack_rx) = mpsc::sync_channel(1);
890        self.cmd_tx
891            .send(SamplerCmd::Begin(ack_tx))
892            .ok()
893            .and_then(|()| ack_rx.recv().ok())
894            .unwrap_or(false)
895    }
896
897    fn end_window(&self) -> Option<ProcessMemoryPeak> {
898        self.cmd_tx.send(SamplerCmd::End).ok()?;
899        self.result_rx.recv().ok().flatten()
900    }
901}
902
903impl Drop for PersistentMemorySampler {
904    fn drop(&mut self) {
905        let _ = self.cmd_tx.send(SamplerCmd::Shutdown);
906        if let Some(handle) = self.handle.take() {
907            let _ = handle.join();
908        }
909    }
910}
911
912#[cfg(any(target_os = "android", target_os = "linux"))]
913fn current_process_memory_kb() -> Option<u64> {
914    let statm = std::fs::read_to_string("/proc/self/statm").ok()?;
915    let resident_pages = statm
916        .split_whitespace()
917        .nth(1)
918        .and_then(|value| value.parse::<u64>().ok())?;
919    // SAFETY: `_SC_PAGESIZE` is a valid sysconf selector on every supported
920    // POSIX target; sysconf has no side effects and reports failures via -1.
921    let page_size = unsafe { libc::sysconf(libc::_SC_PAGESIZE) };
922    if page_size <= 0 {
923        return None;
924    }
925    let page_size = u64::try_from(page_size).ok()?;
926    Some(resident_pages.saturating_mul(page_size) / 1024)
927}
928
929#[cfg(any(target_os = "ios", target_os = "macos"))]
930fn current_process_memory_kb() -> Option<u64> {
931    let mut info = std::mem::MaybeUninit::<libc::mach_task_basic_info_data_t>::uninit();
932    let mut count = libc::MACH_TASK_BASIC_INFO_COUNT;
933    // `mach_task_self` is marked deprecated by libc in favor of
934    // `mach_task_self_`, but the replacement symbol is not yet exposed by the
935    // libc crate's iOS/macOS bindings (libc < 0.3). The deprecation is purely
936    // cosmetic — the function continues to be the documented way to obtain
937    // the current task port and is what Apple's headers expand the macro to.
938    #[allow(deprecated)]
939    // SAFETY: `mach_task_self` always returns a valid task port for the
940    // current process. `MACH_TASK_BASIC_INFO` matches the
941    // `mach_task_basic_info_data_t` layout we pass; `count` carries the
942    // capacity in 32-bit words and is updated by the kernel on success.
943    // We check for `KERN_SUCCESS` before assuming the buffer is initialized.
944    let rc = unsafe {
945        libc::task_info(
946            libc::mach_task_self(),
947            libc::MACH_TASK_BASIC_INFO,
948            info.as_mut_ptr().cast::<libc::integer_t>(),
949            &mut count,
950        )
951    };
952    if rc != libc::KERN_SUCCESS {
953        return None;
954    }
955
956    // SAFETY: `task_info` returned `KERN_SUCCESS`, so the basic info struct
957    // is fully populated.
958    let info = unsafe { info.assume_init() };
959    Some(info.resident_size / 1024)
960}
961
962#[cfg(not(any(
963    target_os = "android",
964    target_os = "linux",
965    target_os = "ios",
966    target_os = "macos"
967)))]
968fn current_process_memory_kb() -> Option<u64> {
969    None
970}
971
972fn measure_iteration<M, F>(
973    monitor: &mut M,
974    f: F,
975) -> Result<(BenchSample, Instant, Instant), TimingError>
976where
977    M: ResourceMonitor,
978    F: FnOnce() -> Result<(), TimingError>,
979{
980    let token = monitor.start();
981    let started_at = Instant::now();
982    let result = f();
983    let ended_at = Instant::now();
984    let resources = monitor.finish(token);
985    result.map(|_| {
986        (
987            BenchSample::from_measurement(ended_at.duration_since(started_at), resources),
988            started_at,
989            ended_at,
990        )
991    })
992}
993
994/// Records a flat semantic phase when called inside an active benchmark measurement loop.
995///
996/// Phases are aggregated across measured iterations and ignored during warmup/setup.
997/// Nested phases are intentionally collapsed in v1 to keep the output flat.
998pub fn profile_phase<T>(name: &str, f: impl FnOnce() -> T) -> T {
999    let guard = SEMANTIC_PHASE_COLLECTOR.with(|collector| {
1000        let mut collector = collector.borrow_mut();
1001        match collector.enter_phase() {
1002            Some(top_level) => SemanticPhaseGuard {
1003                name: name.to_string(),
1004                started_at: Some(Instant::now()),
1005                top_level,
1006            },
1007            None => SemanticPhaseGuard {
1008                name: String::new(),
1009                started_at: None,
1010                top_level: false,
1011            },
1012        }
1013    });
1014
1015    let result = f();
1016    drop(guard);
1017    result
1018}
1019
1020/// Errors that can occur during benchmark execution.
1021///
1022/// # Example
1023///
1024/// ```
1025/// use mobench_sdk::timing::{BenchSpec, TimingError};
1026///
1027/// // Zero iterations produces an error
1028/// let result = BenchSpec::new("test", 0, 10);
1029/// assert!(matches!(result, Err(TimingError::NoIterations { .. })));
1030/// ```
1031#[derive(Debug, Error)]
1032pub enum TimingError {
1033    /// The iteration count was zero or invalid.
1034    ///
1035    /// At least one iteration is required to produce a measurement.
1036    /// The error includes the actual value provided for diagnostic purposes.
1037    #[error("iterations must be greater than zero (got {count}). Minimum recommended: 10")]
1038    NoIterations {
1039        /// The invalid iteration count that was provided.
1040        count: u32,
1041    },
1042
1043    /// The benchmark function failed during execution.
1044    ///
1045    /// Contains a description of the failure.
1046    #[error("benchmark function failed: {0}")]
1047    Execution(String),
1048}
1049
1050/// Runs a benchmark by executing a closure repeatedly.
1051///
1052/// This is the core benchmarking function. It:
1053///
1054/// 1. Executes the closure `spec.warmup` times without recording
1055/// 2. Executes the closure `spec.iterations` times, recording each duration
1056/// 3. Returns a [`BenchReport`] with all samples
1057///
1058/// # Arguments
1059///
1060/// * `spec` - Benchmark configuration specifying iterations and warmup
1061/// * `f` - Closure to benchmark; must return `Result<(), TimingError>`
1062///
1063/// # Returns
1064///
1065/// A [`BenchReport`] containing all timing samples, or a [`TimingError`] if
1066/// the benchmark fails.
1067///
1068/// # Example
1069///
1070/// ```
1071/// use mobench_sdk::timing::{BenchSpec, run_closure, TimingError};
1072///
1073/// let spec = BenchSpec::new("sum_benchmark", 100, 10)?;
1074///
1075/// let report = run_closure(spec, || {
1076///     let sum: u64 = (0..1000).sum();
1077///     std::hint::black_box(sum);
1078///     Ok(())
1079/// })?;
1080///
1081/// assert_eq!(report.samples.len(), 100);
1082///
1083/// // Calculate mean duration
1084/// let total_ns: u64 = report.samples.iter().map(|s| s.duration_ns).sum();
1085/// let mean_ns = total_ns / report.samples.len() as u64;
1086/// println!("Mean: {} ns", mean_ns);
1087/// # Ok::<(), TimingError>(())
1088/// ```
1089///
1090/// # Error Handling
1091///
1092/// If the closure returns an error, the benchmark stops immediately:
1093///
1094/// ```
1095/// use mobench_sdk::timing::{BenchSpec, run_closure, TimingError};
1096///
1097/// let spec = BenchSpec::new("failing_bench", 100, 0)?;
1098///
1099/// let result = run_closure(spec, || {
1100///     Err(TimingError::Execution("simulated failure".into()))
1101/// });
1102///
1103/// assert!(result.is_err());
1104/// # Ok::<(), TimingError>(())
1105/// ```
1106///
1107/// # Timing Precision
1108///
1109/// Uses [`std::time::Instant`] for timing, which provides monotonic,
1110/// nanosecond-resolution measurements on most platforms.
1111pub fn run_closure<F>(spec: BenchSpec, f: F) -> Result<BenchReport, TimingError>
1112where
1113    F: FnMut() -> Result<(), TimingError>,
1114{
1115    let mut monitor = DefaultResourceMonitor::default();
1116    run_closure_with_monitor(spec, &mut monitor, f)
1117}
1118
1119fn run_closure_with_monitor<F, M>(
1120    spec: BenchSpec,
1121    monitor: &mut M,
1122    mut f: F,
1123) -> Result<BenchReport, TimingError>
1124where
1125    F: FnMut() -> Result<(), TimingError>,
1126    M: ResourceMonitor,
1127{
1128    if spec.iterations == 0 {
1129        return Err(TimingError::NoIterations {
1130            count: spec.iterations,
1131        });
1132    }
1133
1134    reset_semantic_phase_collection();
1135    let harness_origin = Instant::now();
1136    let mut timeline = Vec::new();
1137
1138    // Warmup phase - not measured
1139    for iteration in 0..spec.warmup {
1140        let phase_start = Instant::now();
1141        f()?;
1142        push_timeline_span(
1143            &mut timeline,
1144            harness_origin,
1145            "warmup-benchmark",
1146            phase_start,
1147            Instant::now(),
1148            Some(iteration),
1149        );
1150    }
1151
1152    // Measurement phase
1153    begin_semantic_phase_collection();
1154    let mut samples = Vec::with_capacity(spec.iterations as usize);
1155    for iteration in 0..spec.iterations {
1156        let (sample, start, end) = match measure_iteration(monitor, &mut f) {
1157            Ok(measurement) => measurement,
1158            Err(err) => {
1159                let _ = finish_semantic_phase_collection();
1160                return Err(err);
1161            }
1162        };
1163        samples.push(sample);
1164        push_timeline_span(
1165            &mut timeline,
1166            harness_origin,
1167            "measured-benchmark",
1168            start,
1169            end,
1170            Some(iteration),
1171        );
1172    }
1173    let phases = finish_semantic_phase_collection();
1174
1175    Ok(BenchReport {
1176        spec,
1177        samples,
1178        phases,
1179        timeline,
1180    })
1181}
1182
1183/// Runs a benchmark with setup that executes once before all iterations.
1184///
1185/// The setup function is called once before timing begins, then the benchmark
1186/// runs multiple times using a reference to the setup result. This is useful
1187/// for expensive initialization that shouldn't be included in timing.
1188///
1189/// # Arguments
1190///
1191/// * `spec` - Benchmark configuration specifying iterations and warmup
1192/// * `setup` - Function that creates the input data (called once, not timed)
1193/// * `f` - Benchmark closure that receives a reference to setup result
1194///
1195/// # Example
1196///
1197/// ```ignore
1198/// use mobench_sdk::timing::{BenchSpec, run_closure_with_setup};
1199///
1200/// fn setup_data() -> Vec<u8> {
1201///     vec![0u8; 1_000_000]  // Expensive allocation not measured
1202/// }
1203///
1204/// let spec = BenchSpec::new("hash_benchmark", 100, 10)?;
1205/// let report = run_closure_with_setup(spec, setup_data, |data| {
1206///     std::hint::black_box(compute_hash(data));
1207///     Ok(())
1208/// })?;
1209/// ```
1210pub fn run_closure_with_setup<S, T, F>(
1211    spec: BenchSpec,
1212    setup: S,
1213    mut f: F,
1214) -> Result<BenchReport, TimingError>
1215where
1216    S: FnOnce() -> T,
1217    F: FnMut(&T) -> Result<(), TimingError>,
1218{
1219    let mut monitor = DefaultResourceMonitor::default();
1220    run_closure_with_setup_with_monitor(spec, &mut monitor, setup, move |input| f(input))
1221}
1222
1223fn run_closure_with_setup_with_monitor<S, T, F, M>(
1224    spec: BenchSpec,
1225    monitor: &mut M,
1226    setup: S,
1227    mut f: F,
1228) -> Result<BenchReport, TimingError>
1229where
1230    S: FnOnce() -> T,
1231    F: FnMut(&T) -> Result<(), TimingError>,
1232    M: ResourceMonitor,
1233{
1234    if spec.iterations == 0 {
1235        return Err(TimingError::NoIterations {
1236            count: spec.iterations,
1237        });
1238    }
1239
1240    reset_semantic_phase_collection();
1241    let harness_origin = Instant::now();
1242    let mut timeline = Vec::new();
1243
1244    // Setup phase - not timed
1245    let setup_start = Instant::now();
1246    let input = setup();
1247    push_timeline_span(
1248        &mut timeline,
1249        harness_origin,
1250        "setup",
1251        setup_start,
1252        Instant::now(),
1253        None,
1254    );
1255
1256    // Warmup phase - not recorded
1257    for iteration in 0..spec.warmup {
1258        let phase_start = Instant::now();
1259        f(&input)?;
1260        push_timeline_span(
1261            &mut timeline,
1262            harness_origin,
1263            "warmup-benchmark",
1264            phase_start,
1265            Instant::now(),
1266            Some(iteration),
1267        );
1268    }
1269
1270    // Measurement phase
1271    begin_semantic_phase_collection();
1272    let mut samples = Vec::with_capacity(spec.iterations as usize);
1273    for iteration in 0..spec.iterations {
1274        let (sample, start, end) = match measure_iteration(monitor, || f(&input)) {
1275            Ok(measurement) => measurement,
1276            Err(err) => {
1277                let _ = finish_semantic_phase_collection();
1278                return Err(err);
1279            }
1280        };
1281        samples.push(sample);
1282        push_timeline_span(
1283            &mut timeline,
1284            harness_origin,
1285            "measured-benchmark",
1286            start,
1287            end,
1288            Some(iteration),
1289        );
1290    }
1291    let phases = finish_semantic_phase_collection();
1292
1293    Ok(BenchReport {
1294        spec,
1295        samples,
1296        phases,
1297        timeline,
1298    })
1299}
1300
1301/// Runs a benchmark with per-iteration setup.
1302///
1303/// Setup runs before each iteration and is not timed. The benchmark takes
1304/// ownership of the setup result, making this suitable for benchmarks that
1305/// mutate their input (e.g., sorting).
1306///
1307/// # Arguments
1308///
1309/// * `spec` - Benchmark configuration specifying iterations and warmup
1310/// * `setup` - Function that creates fresh input for each iteration (not timed)
1311/// * `f` - Benchmark closure that takes ownership of setup result
1312///
1313/// # Example
1314///
1315/// ```ignore
1316/// use mobench_sdk::timing::{BenchSpec, run_closure_with_setup_per_iter};
1317///
1318/// fn generate_random_vec() -> Vec<i32> {
1319///     (0..1000).map(|_| rand::random()).collect()
1320/// }
1321///
1322/// let spec = BenchSpec::new("sort_benchmark", 100, 10)?;
1323/// let report = run_closure_with_setup_per_iter(spec, generate_random_vec, |mut data| {
1324///     data.sort();
1325///     std::hint::black_box(data);
1326///     Ok(())
1327/// })?;
1328/// ```
1329pub fn run_closure_with_setup_per_iter<S, T, F>(
1330    spec: BenchSpec,
1331    setup: S,
1332    f: F,
1333) -> Result<BenchReport, TimingError>
1334where
1335    S: FnMut() -> T,
1336    F: FnMut(T) -> Result<(), TimingError>,
1337{
1338    let mut monitor = DefaultResourceMonitor::default();
1339    run_closure_with_setup_per_iter_with_monitor(spec, &mut monitor, setup, f)
1340}
1341
1342fn run_closure_with_setup_per_iter_with_monitor<S, T, F, M>(
1343    spec: BenchSpec,
1344    monitor: &mut M,
1345    mut setup: S,
1346    mut f: F,
1347) -> Result<BenchReport, TimingError>
1348where
1349    S: FnMut() -> T,
1350    F: FnMut(T) -> Result<(), TimingError>,
1351    M: ResourceMonitor,
1352{
1353    if spec.iterations == 0 {
1354        return Err(TimingError::NoIterations {
1355            count: spec.iterations,
1356        });
1357    }
1358
1359    reset_semantic_phase_collection();
1360    let harness_origin = Instant::now();
1361    let mut timeline = Vec::new();
1362
1363    // Warmup phase
1364    for iteration in 0..spec.warmup {
1365        let setup_start = Instant::now();
1366        let input = setup();
1367        push_timeline_span(
1368            &mut timeline,
1369            harness_origin,
1370            "fixture-setup",
1371            setup_start,
1372            Instant::now(),
1373            Some(iteration),
1374        );
1375        let phase_start = Instant::now();
1376        f(input)?;
1377        push_timeline_span(
1378            &mut timeline,
1379            harness_origin,
1380            "warmup-benchmark",
1381            phase_start,
1382            Instant::now(),
1383            Some(iteration),
1384        );
1385    }
1386
1387    // Measurement phase
1388    begin_semantic_phase_collection();
1389    let mut samples = Vec::with_capacity(spec.iterations as usize);
1390    for iteration in 0..spec.iterations {
1391        let setup_start = Instant::now();
1392        let input = setup(); // Not timed
1393        push_timeline_span(
1394            &mut timeline,
1395            harness_origin,
1396            "fixture-setup",
1397            setup_start,
1398            Instant::now(),
1399            Some(iteration),
1400        );
1401
1402        let (sample, start, end) = match measure_iteration(monitor, || f(input)) {
1403            Ok(measurement) => measurement,
1404            Err(err) => {
1405                let _ = finish_semantic_phase_collection();
1406                return Err(err);
1407            }
1408        };
1409        samples.push(sample);
1410        push_timeline_span(
1411            &mut timeline,
1412            harness_origin,
1413            "measured-benchmark",
1414            start,
1415            end,
1416            Some(iteration),
1417        );
1418    }
1419    let phases = finish_semantic_phase_collection();
1420
1421    Ok(BenchReport {
1422        spec,
1423        samples,
1424        phases,
1425        timeline,
1426    })
1427}
1428
1429/// Runs a benchmark with setup and teardown.
1430///
1431/// Setup runs once before all iterations, teardown runs once after all
1432/// iterations complete. Neither is included in timing.
1433///
1434/// # Arguments
1435///
1436/// * `spec` - Benchmark configuration specifying iterations and warmup
1437/// * `setup` - Function that creates the input data (called once, not timed)
1438/// * `f` - Benchmark closure that receives a reference to setup result
1439/// * `teardown` - Function that cleans up the input (called once, not timed)
1440///
1441/// # Example
1442///
1443/// ```ignore
1444/// use mobench_sdk::timing::{BenchSpec, run_closure_with_setup_teardown};
1445///
1446/// fn setup_db() -> Database { Database::connect("test.db") }
1447/// fn cleanup_db(db: Database) { db.close(); std::fs::remove_file("test.db").ok(); }
1448///
1449/// let spec = BenchSpec::new("db_benchmark", 100, 10)?;
1450/// let report = run_closure_with_setup_teardown(
1451///     spec,
1452///     setup_db,
1453///     |db| { db.query("SELECT *"); Ok(()) },
1454///     cleanup_db,
1455/// )?;
1456/// ```
1457pub fn run_closure_with_setup_teardown<S, T, F, D>(
1458    spec: BenchSpec,
1459    setup: S,
1460    mut f: F,
1461    teardown: D,
1462) -> Result<BenchReport, TimingError>
1463where
1464    S: FnOnce() -> T,
1465    F: FnMut(&T) -> Result<(), TimingError>,
1466    D: FnOnce(T),
1467{
1468    let mut monitor = DefaultResourceMonitor::default();
1469    run_closure_with_setup_teardown_with_monitor(
1470        spec,
1471        &mut monitor,
1472        setup,
1473        move |input| f(input),
1474        teardown,
1475    )
1476}
1477
1478fn run_closure_with_setup_teardown_with_monitor<S, T, F, D, M>(
1479    spec: BenchSpec,
1480    monitor: &mut M,
1481    setup: S,
1482    mut f: F,
1483    teardown: D,
1484) -> Result<BenchReport, TimingError>
1485where
1486    S: FnOnce() -> T,
1487    F: FnMut(&T) -> Result<(), TimingError>,
1488    D: FnOnce(T),
1489    M: ResourceMonitor,
1490{
1491    if spec.iterations == 0 {
1492        return Err(TimingError::NoIterations {
1493            count: spec.iterations,
1494        });
1495    }
1496
1497    reset_semantic_phase_collection();
1498    let harness_origin = Instant::now();
1499    let mut timeline = Vec::new();
1500
1501    // Setup phase - not timed
1502    let setup_start = Instant::now();
1503    let input = setup();
1504    push_timeline_span(
1505        &mut timeline,
1506        harness_origin,
1507        "setup",
1508        setup_start,
1509        Instant::now(),
1510        None,
1511    );
1512
1513    let result = (|| {
1514        // Warmup phase
1515        for iteration in 0..spec.warmup {
1516            let phase_start = Instant::now();
1517            f(&input)?;
1518            push_timeline_span(
1519                &mut timeline,
1520                harness_origin,
1521                "warmup-benchmark",
1522                phase_start,
1523                Instant::now(),
1524                Some(iteration),
1525            );
1526        }
1527
1528        // Measurement phase
1529        begin_semantic_phase_collection();
1530        let mut samples = Vec::with_capacity(spec.iterations as usize);
1531        for iteration in 0..spec.iterations {
1532            let (sample, start, end) = match measure_iteration(monitor, || f(&input)) {
1533                Ok(measurement) => measurement,
1534                Err(err) => {
1535                    let _ = finish_semantic_phase_collection();
1536                    return Err(err);
1537                }
1538            };
1539            samples.push(sample);
1540            push_timeline_span(
1541                &mut timeline,
1542                harness_origin,
1543                "measured-benchmark",
1544                start,
1545                end,
1546                Some(iteration),
1547            );
1548        }
1549        let phases = finish_semantic_phase_collection();
1550
1551        Ok((samples, phases))
1552    })();
1553
1554    // Teardown phase - not timed. It runs even when warmup/measurement fails.
1555    let teardown_start = Instant::now();
1556    teardown(input);
1557    push_timeline_span(
1558        &mut timeline,
1559        harness_origin,
1560        "teardown",
1561        teardown_start,
1562        Instant::now(),
1563        None,
1564    );
1565
1566    let (samples, phases) = result?;
1567    Ok(BenchReport {
1568        spec,
1569        samples,
1570        phases,
1571        timeline,
1572    })
1573}
1574
1575#[cfg(test)]
1576mod tests {
1577    use super::*;
1578
1579    #[derive(Default)]
1580    struct FakeResourceMonitor {
1581        samples: Vec<IterationResourceUsage>,
1582        started: usize,
1583        finished: usize,
1584    }
1585
1586    impl FakeResourceMonitor {
1587        fn new(samples: Vec<IterationResourceUsage>) -> Self {
1588            Self {
1589                samples,
1590                started: 0,
1591                finished: 0,
1592            }
1593        }
1594    }
1595
1596    impl ResourceMonitor for FakeResourceMonitor {
1597        type Token = usize;
1598
1599        fn start(&mut self) -> Self::Token {
1600            let token = self.started;
1601            self.started += 1;
1602            assert!(
1603                token < self.samples.len(),
1604                "resource capture should only run for measured iterations"
1605            );
1606            token
1607        }
1608
1609        fn finish(&mut self, token: Self::Token) -> IterationResourceUsage {
1610            self.finished += 1;
1611            self.samples
1612                .get(token)
1613                .cloned()
1614                .expect("resource usage for measured iteration")
1615        }
1616    }
1617
1618    #[cfg(unix)]
1619    #[test]
1620    fn process_cpu_time_snapshot_sums_user_and_kernel_time() {
1621        let snapshot = ProcessCpuTimeSnapshot::from_rusage_timevals(
1622            libc::timeval {
1623                tv_sec: 1,
1624                tv_usec: 250_000,
1625            },
1626            libc::timeval {
1627                tv_sec: 0,
1628                tv_usec: 750_000,
1629            },
1630        )
1631        .expect("valid snapshot");
1632
1633        assert_eq!(snapshot.total_ns(), 2_000_000_000);
1634    }
1635
1636    #[cfg(unix)]
1637    #[test]
1638    fn process_cpu_time_delta_ms_uses_user_and_kernel_time() {
1639        let start = ProcessCpuTimeSnapshot::from_rusage_timevals(
1640            libc::timeval {
1641                tv_sec: 1,
1642                tv_usec: 250_000,
1643            },
1644            libc::timeval {
1645                tv_sec: 0,
1646                tv_usec: 750_000,
1647            },
1648        )
1649        .expect("valid start snapshot");
1650        let end = ProcessCpuTimeSnapshot::from_rusage_timevals(
1651            libc::timeval {
1652                tv_sec: 1,
1653                tv_usec: 900_000,
1654            },
1655            libc::timeval {
1656                tv_sec: 1,
1657                tv_usec: 400_600,
1658            },
1659        )
1660        .expect("valid end snapshot");
1661
1662        assert_eq!(process_cpu_delta_ms(start, end), Some(1_301));
1663    }
1664
1665    #[test]
1666    fn runs_benchmark_collects_requested_samples() {
1667        let spec = BenchSpec::new("noop", 3, 1).unwrap();
1668        let report = run_closure(spec, || Ok(())).unwrap();
1669
1670        assert_eq!(report.samples.len(), 3);
1671        assert_eq!(report.spec.name, "noop");
1672        assert_eq!(report.spec.iterations, 3);
1673    }
1674
1675    #[test]
1676    fn rejects_zero_iterations() {
1677        let result = BenchSpec::new("test", 0, 10);
1678        assert!(matches!(
1679            result,
1680            Err(TimingError::NoIterations { count: 0 })
1681        ));
1682    }
1683
1684    #[test]
1685    fn allows_zero_warmup() {
1686        let spec = BenchSpec::new("test", 5, 0).unwrap();
1687        assert_eq!(spec.warmup, 0);
1688
1689        let report = run_closure(spec, || Ok(())).unwrap();
1690        assert_eq!(report.samples.len(), 5);
1691    }
1692
1693    #[test]
1694    fn serializes_to_json() {
1695        let report = BenchReport {
1696            spec: BenchSpec::new("test", 10, 2).unwrap(),
1697            samples: vec![BenchSample {
1698                duration_ns: 1_000_000,
1699                cpu_time_ms: Some(42),
1700                peak_memory_kb: Some(512),
1701                process_peak_memory_kb: Some(1536),
1702            }],
1703            phases: vec![SemanticPhase {
1704                name: "prove".to_string(),
1705                duration_ns: 1_000_000,
1706            }],
1707            timeline: vec![HarnessTimelineSpan {
1708                phase: "measured-benchmark".to_string(),
1709                start_offset_ns: 0,
1710                end_offset_ns: 1_000_000,
1711                iteration: Some(0),
1712            }],
1713        };
1714
1715        let json = serde_json::to_string(&report).unwrap();
1716        assert!(json.contains("\"peak_memory_kb\""));
1717        assert!(json.contains("\"process_peak_memory_kb\""));
1718        assert!(!json.contains("peak_memory_growth_kb"));
1719        let restored: BenchReport = serde_json::from_str(&json).unwrap();
1720
1721        assert_eq!(restored.spec.name, "test");
1722        assert_eq!(restored.samples.len(), 1);
1723        assert_eq!(restored.samples[0].cpu_time_ms, Some(42));
1724        assert_eq!(restored.samples[0].peak_memory_kb, Some(512));
1725        assert_eq!(restored.samples[0].process_peak_memory_kb, Some(1536));
1726        assert_eq!(restored.phases.len(), 1);
1727        assert_eq!(restored.phases[0].name, "prove");
1728        assert!(restored.phases[0].duration_ns > 0);
1729    }
1730
1731    #[test]
1732    fn profile_phase_records_only_measured_iterations() {
1733        let spec = BenchSpec::new("semantic", 2, 1).unwrap();
1734        let mut call_index = 0u32;
1735        let report = run_closure(spec, || {
1736            let phase_name = if call_index == 0 {
1737                "warmup-only"
1738            } else {
1739                "prove"
1740            };
1741            call_index += 1;
1742            profile_phase(phase_name, || std::thread::sleep(Duration::from_millis(1)));
1743            Ok(())
1744        })
1745        .unwrap();
1746
1747        assert!(
1748            !report
1749                .phases
1750                .iter()
1751                .any(|phase| phase.name == "warmup-only"),
1752            "warmup phases should not be recorded"
1753        );
1754        let prove = report
1755            .phases
1756            .iter()
1757            .find(|phase| phase.name == "prove")
1758            .expect("prove phase");
1759        assert!(prove.duration_ns > 0);
1760    }
1761
1762    #[test]
1763    fn profile_phase_keeps_the_v1_model_flat() {
1764        let spec = BenchSpec::new("semantic-flat", 1, 0).unwrap();
1765        let report = run_closure(spec, || {
1766            profile_phase("prove", || {
1767                std::thread::sleep(Duration::from_millis(1));
1768                profile_phase("inner", || std::thread::sleep(Duration::from_millis(1)));
1769            });
1770            Ok(())
1771        })
1772        .unwrap();
1773
1774        assert!(report.phases.iter().any(|phase| phase.name == "prove"));
1775        assert!(
1776            !report.phases.iter().any(|phase| phase.name == "inner"),
1777            "nested phases should not create a second flat phase entry"
1778        );
1779    }
1780
1781    #[test]
1782    fn measured_cpu_excludes_warmup_iterations() {
1783        let spec = BenchSpec::new("cpu", 2, 1).unwrap();
1784        let mut monitor = FakeResourceMonitor::new(vec![
1785            IterationResourceUsage {
1786                cpu_time_ms: Some(11),
1787                peak_memory_kb: Some(32),
1788                ..Default::default()
1789            },
1790            IterationResourceUsage {
1791                cpu_time_ms: Some(17),
1792                peak_memory_kb: Some(64),
1793                ..Default::default()
1794            },
1795        ]);
1796        let mut calls = 0_u32;
1797
1798        let report = run_closure_with_monitor(spec, &mut monitor, || {
1799            calls += 1;
1800            Ok(())
1801        })
1802        .unwrap();
1803
1804        assert_eq!(calls, 3);
1805        assert_eq!(monitor.started, 2);
1806        assert_eq!(monitor.finished, 2);
1807        assert_eq!(
1808            report
1809                .samples
1810                .iter()
1811                .map(|sample| sample.cpu_time_ms)
1812                .collect::<Vec<_>>(),
1813            vec![Some(11), Some(17)]
1814        );
1815        assert_eq!(report.cpu_total_ms(), Some(28));
1816    }
1817
1818    #[test]
1819    fn measured_cpu_excludes_outer_harness_and_report_overhead() {
1820        let spec = BenchSpec::new("cpu-harness", 2, 1).unwrap();
1821        let mut monitor = FakeResourceMonitor::new(vec![
1822            IterationResourceUsage {
1823                cpu_time_ms: Some(5),
1824                peak_memory_kb: Some(12),
1825                ..Default::default()
1826            },
1827            IterationResourceUsage {
1828                cpu_time_ms: Some(7),
1829                peak_memory_kb: Some(18),
1830                ..Default::default()
1831            },
1832        ]);
1833
1834        let mut setup_calls = 0_u32;
1835        let mut teardown_calls = 0_u32;
1836        let report = run_closure_with_setup_teardown_with_monitor(
1837            spec,
1838            &mut monitor,
1839            || {
1840                setup_calls += 1;
1841                vec![1_u8, 2, 3]
1842            },
1843            |_fixture| Ok(()),
1844            |_fixture| {
1845                teardown_calls += 1;
1846            },
1847        )
1848        .unwrap();
1849
1850        let _serialized = serde_json::to_string(&report).unwrap();
1851
1852        assert_eq!(setup_calls, 1);
1853        assert_eq!(teardown_calls, 1);
1854        assert_eq!(monitor.started, 2);
1855        assert_eq!(report.cpu_total_ms(), Some(12));
1856        assert_eq!(report.cpu_median_ms(), Some(6));
1857    }
1858
1859    #[test]
1860    fn setup_teardown_runs_teardown_when_warmup_fails() {
1861        let spec = BenchSpec::new("teardown-on-error", 1, 1).unwrap();
1862        let mut teardown_calls = 0_u32;
1863
1864        let result = run_closure_with_setup_teardown(
1865            spec,
1866            || vec![1_u8, 2, 3],
1867            |_fixture| Err(TimingError::Execution("warmup failed".to_string())),
1868            |_fixture| {
1869                teardown_calls += 1;
1870            },
1871        );
1872
1873        assert!(result.is_err());
1874        assert_eq!(teardown_calls, 1);
1875    }
1876
1877    #[test]
1878    fn single_iteration_cpu_median_matches_the_measured_iteration() {
1879        let spec = BenchSpec::new("single", 1, 0).unwrap();
1880        let mut monitor = FakeResourceMonitor::new(vec![IterationResourceUsage {
1881            cpu_time_ms: Some(42),
1882            peak_memory_kb: Some(24),
1883            ..Default::default()
1884        }]);
1885
1886        let report = run_closure_with_monitor(spec, &mut monitor, || Ok(())).unwrap();
1887
1888        assert_eq!(report.samples[0].cpu_time_ms, Some(42));
1889        assert_eq!(report.cpu_total_ms(), Some(42));
1890        assert_eq!(report.cpu_median_ms(), Some(42));
1891    }
1892
1893    #[test]
1894    fn multiple_iterations_export_the_median_cpu_sample() {
1895        let spec = BenchSpec::new("median", 3, 0).unwrap();
1896        let mut monitor = FakeResourceMonitor::new(vec![
1897            IterationResourceUsage {
1898                cpu_time_ms: Some(19),
1899                peak_memory_kb: Some(10),
1900                ..Default::default()
1901            },
1902            IterationResourceUsage {
1903                cpu_time_ms: Some(7),
1904                peak_memory_kb: Some(30),
1905                ..Default::default()
1906            },
1907            IterationResourceUsage {
1908                cpu_time_ms: Some(11),
1909                peak_memory_kb: Some(20),
1910                ..Default::default()
1911            },
1912        ]);
1913
1914        let report = run_closure_with_monitor(spec, &mut monitor, || Ok(())).unwrap();
1915
1916        assert_eq!(report.cpu_median_ms(), Some(11));
1917        assert_eq!(report.cpu_total_ms(), Some(37));
1918    }
1919
1920    #[test]
1921    fn peak_memory_excludes_harness_baseline_overhead() {
1922        let spec = BenchSpec::new("memory", 2, 1).unwrap();
1923        let mut monitor = FakeResourceMonitor::new(vec![
1924            IterationResourceUsage {
1925                cpu_time_ms: Some(3),
1926                peak_memory_kb: Some(48),
1927                process_peak_memory_kb: Some(1_048),
1928            },
1929            IterationResourceUsage {
1930                cpu_time_ms: Some(4),
1931                peak_memory_kb: Some(96),
1932                process_peak_memory_kb: Some(1_096),
1933            },
1934        ]);
1935
1936        let report = run_closure_with_setup_teardown_with_monitor(
1937            spec,
1938            &mut monitor,
1939            || vec![0_u8; 1024],
1940            |_fixture| Ok(()),
1941            |_fixture| {},
1942        )
1943        .unwrap();
1944
1945        assert_eq!(
1946            report
1947                .samples
1948                .iter()
1949                .map(|sample| sample.peak_memory_kb)
1950                .collect::<Vec<_>>(),
1951            vec![Some(48), Some(96)]
1952        );
1953        assert_eq!(report.peak_memory_kb(), Some(96));
1954        assert_eq!(report.peak_memory_growth_kb(), report.peak_memory_kb());
1955        assert_eq!(report.process_peak_memory_kb(), Some(1_096));
1956    }
1957
1958    #[test]
1959    fn memory_peak_sampler_uses_the_first_post_startup_sample_as_its_baseline() {
1960        use std::collections::VecDeque;
1961        use std::sync::{Arc, Mutex};
1962
1963        // Queue: [80=startup warmup, 100=baseline-on-Begin, 140, 120, ...]
1964        // After exhaustion the reader returns 120 forever, so peak stays 140.
1965        let samples = Arc::new(Mutex::new(VecDeque::from([
1966            Some(80_u64),
1967            Some(100_u64),
1968            Some(140_u64),
1969            Some(120_u64),
1970        ])));
1971        let reader_samples = Arc::clone(&samples);
1972        let reader = Arc::new(move || {
1973            reader_samples
1974                .lock()
1975                .expect("sample queue")
1976                .pop_front()
1977                .unwrap_or(Some(120))
1978        });
1979
1980        let sampler = PersistentMemorySampler::start_with_reader(reader).expect("sampler");
1981        assert!(sampler.begin_window());
1982        let peak = sampler.end_window().expect("peak memory");
1983
1984        assert_eq!(
1985            peak,
1986            ProcessMemoryPeak {
1987                growth_kb: 40,
1988                process_peak_kb: 140,
1989            }
1990        );
1991    }
1992
1993    #[test]
1994    fn persistent_memory_sampler_does_not_queue_result_when_begin_fails() {
1995        use std::collections::VecDeque;
1996        use std::sync::{Arc, Mutex};
1997
1998        // Queue: [80=startup warmup, None=failed first baseline,
1999        // 100=second baseline, 130=final sample].
2000        let samples = Arc::new(Mutex::new(VecDeque::from([
2001            Some(80_u64),
2002            None,
2003            Some(100_u64),
2004            Some(130_u64),
2005        ])));
2006        let reader_samples = Arc::clone(&samples);
2007        let reader = Arc::new(move || {
2008            reader_samples
2009                .lock()
2010                .expect("sample queue")
2011                .pop_front()
2012                .unwrap_or(Some(130))
2013        });
2014
2015        let sampler = PersistentMemorySampler::start_with_reader(reader).expect("sampler");
2016        assert!(!sampler.begin_window());
2017        assert!(sampler.begin_window());
2018        let peak = sampler
2019            .end_window()
2020            .expect("second window should receive its own result");
2021
2022        assert_eq!(
2023            peak,
2024            ProcessMemoryPeak {
2025                growth_kb: 30,
2026                process_peak_kb: 130,
2027            }
2028        );
2029    }
2030
2031    #[test]
2032    fn persistent_memory_sampler_waits_for_baseline_before_begin_returns() {
2033        use std::sync::atomic::{AtomicBool, Ordering};
2034        use std::sync::{Arc, Mutex};
2035
2036        let call_count = Arc::new(Mutex::new(0_u32));
2037        let (baseline_entered_tx, baseline_entered_rx) = mpsc::sync_channel(1);
2038        let (baseline_release_tx, baseline_release_rx) = mpsc::sync_channel(1);
2039        let baseline_release_rx = Arc::new(Mutex::new(baseline_release_rx));
2040        let baseline_released = Arc::new(AtomicBool::new(false));
2041
2042        let reader_calls = Arc::clone(&call_count);
2043        let reader_release = Arc::clone(&baseline_release_rx);
2044        let reader = Arc::new(move || {
2045            let mut calls = reader_calls.lock().expect("call count");
2046            *calls += 1;
2047            let current = *calls;
2048            drop(calls);
2049
2050            if current == 2 {
2051                baseline_entered_tx.send(()).expect("baseline entered");
2052                reader_release
2053                    .lock()
2054                    .expect("baseline release")
2055                    .recv()
2056                    .expect("release baseline");
2057                return Some(100);
2058            }
2059
2060            Some(120)
2061        });
2062
2063        let released = Arc::clone(&baseline_released);
2064        let release_handle = thread::spawn(move || {
2065            baseline_entered_rx.recv().expect("baseline read started");
2066            thread::sleep(Duration::from_millis(20));
2067            released.store(true, Ordering::SeqCst);
2068            baseline_release_tx.send(()).expect("release baseline");
2069        });
2070
2071        let sampler = PersistentMemorySampler::start_with_reader(reader).expect("sampler");
2072        assert!(sampler.begin_window());
2073        assert!(
2074            baseline_released.load(Ordering::SeqCst),
2075            "begin_window returned before the baseline sample completed"
2076        );
2077        release_handle.join().expect("join release thread");
2078
2079        let peak = sampler.end_window().expect("peak memory");
2080        assert_eq!(peak.growth_kb, 20);
2081        assert_eq!(peak.process_peak_kb, 120);
2082    }
2083
2084    #[test]
2085    fn persistent_memory_sampler_supports_multiple_windows() {
2086        use std::collections::VecDeque;
2087        use std::sync::{Arc, Mutex};
2088
2089        // First window baseline=200 peak=260 (growth=60).
2090        // Second window baseline=190 peak=250 (growth=60).
2091        let samples = Arc::new(Mutex::new(VecDeque::from([
2092            Some(50_u64),  // startup warmup
2093            Some(200_u64), // window 1 baseline
2094            Some(260_u64), // window 1 peak
2095            Some(190_u64), // window 2 baseline
2096            Some(250_u64), // window 2 peak
2097        ])));
2098        let reader_samples = Arc::clone(&samples);
2099        let reader = Arc::new(move || {
2100            reader_samples
2101                .lock()
2102                .expect("sample queue")
2103                .pop_front()
2104                .unwrap_or(Some(0))
2105        });
2106
2107        let sampler = PersistentMemorySampler::start_with_reader(reader).expect("sampler");
2108
2109        assert!(sampler.begin_window());
2110        let first = sampler.end_window().expect("first peak");
2111        assert_eq!(first.process_peak_kb, 260);
2112        assert_eq!(first.growth_kb, 60);
2113
2114        assert!(sampler.begin_window());
2115        let second = sampler.end_window().expect("second peak");
2116        assert_eq!(second.process_peak_kb, 250);
2117        assert_eq!(second.growth_kb, 60);
2118    }
2119
2120    #[test]
2121    fn bench_report_deserializes_legacy_payload_without_phases_or_timeline() {
2122        // Wire format produced by mobench <= 0.1.34 (no phases / timeline /
2123        // resource fields). Adding these fields to BenchReport must not
2124        // break consumers that still emit the older shape.
2125        let legacy = r#"{
2126            "spec": { "name": "legacy", "iterations": 2, "warmup": 0 },
2127            "samples": [
2128                { "duration_ns": 100 },
2129                { "duration_ns": 200 }
2130            ]
2131        }"#;
2132
2133        let report: BenchReport = serde_json::from_str(legacy).expect("legacy report parses");
2134        assert_eq!(report.samples.len(), 2);
2135        assert!(report.phases.is_empty());
2136        assert!(report.timeline.is_empty());
2137        assert!(report.samples[0].cpu_time_ms.is_none());
2138        assert!(report.samples[0].peak_memory_kb.is_none());
2139        assert!(report.samples[0].process_peak_memory_kb.is_none());
2140
2141        // Round-trip the parsed report and confirm the empty optional
2142        // collections are skipped from the serialized output.
2143        let json = serde_json::to_string(&report).expect("serialize");
2144        assert!(!json.contains("\"phases\""));
2145        assert!(!json.contains("\"timeline\""));
2146    }
2147
2148    #[test]
2149    fn run_with_setup_calls_setup_once() {
2150        use std::sync::atomic::{AtomicU32, Ordering};
2151
2152        static SETUP_COUNT: AtomicU32 = AtomicU32::new(0);
2153        static RUN_COUNT: AtomicU32 = AtomicU32::new(0);
2154
2155        let spec = BenchSpec::new("test", 5, 2).unwrap();
2156        let report = run_closure_with_setup(
2157            spec,
2158            || {
2159                SETUP_COUNT.fetch_add(1, Ordering::SeqCst);
2160                vec![1, 2, 3]
2161            },
2162            |data| {
2163                RUN_COUNT.fetch_add(1, Ordering::SeqCst);
2164                std::hint::black_box(data.len());
2165                Ok(())
2166            },
2167        )
2168        .unwrap();
2169
2170        assert_eq!(SETUP_COUNT.load(Ordering::SeqCst), 1); // Setup called once
2171        assert_eq!(RUN_COUNT.load(Ordering::SeqCst), 7); // 2 warmup + 5 iterations
2172        assert_eq!(report.samples.len(), 5);
2173    }
2174
2175    #[test]
2176    fn run_with_setup_per_iter_calls_setup_each_time() {
2177        use std::sync::atomic::{AtomicU32, Ordering};
2178
2179        static SETUP_COUNT: AtomicU32 = AtomicU32::new(0);
2180
2181        let spec = BenchSpec::new("test", 3, 1).unwrap();
2182        let report = run_closure_with_setup_per_iter(
2183            spec,
2184            || {
2185                SETUP_COUNT.fetch_add(1, Ordering::SeqCst);
2186                vec![1, 2, 3]
2187            },
2188            |data| {
2189                std::hint::black_box(data);
2190                Ok(())
2191            },
2192        )
2193        .unwrap();
2194
2195        assert_eq!(SETUP_COUNT.load(Ordering::SeqCst), 4); // 1 warmup + 3 iterations
2196        assert_eq!(report.samples.len(), 3);
2197    }
2198
2199    #[test]
2200    fn run_with_setup_teardown_calls_both() {
2201        use std::sync::atomic::{AtomicU32, Ordering};
2202
2203        static SETUP_COUNT: AtomicU32 = AtomicU32::new(0);
2204        static TEARDOWN_COUNT: AtomicU32 = AtomicU32::new(0);
2205
2206        let spec = BenchSpec::new("test", 3, 1).unwrap();
2207        let report = run_closure_with_setup_teardown(
2208            spec,
2209            || {
2210                SETUP_COUNT.fetch_add(1, Ordering::SeqCst);
2211                "resource"
2212            },
2213            |_resource| Ok(()),
2214            |_resource| {
2215                TEARDOWN_COUNT.fetch_add(1, Ordering::SeqCst);
2216            },
2217        )
2218        .unwrap();
2219
2220        assert_eq!(SETUP_COUNT.load(Ordering::SeqCst), 1);
2221        assert_eq!(TEARDOWN_COUNT.load(Ordering::SeqCst), 1);
2222        assert_eq!(report.samples.len(), 3);
2223    }
2224
2225    #[test]
2226    fn bench_report_serializes_exact_harness_timeline() {
2227        let spec = BenchSpec::new("timeline", 2, 1).unwrap();
2228        let report = run_closure_with_setup_teardown(
2229            spec,
2230            || {
2231                std::thread::sleep(Duration::from_millis(1));
2232                "resource"
2233            },
2234            |_resource| {
2235                std::thread::sleep(Duration::from_millis(1));
2236                Ok(())
2237            },
2238            |_resource| {
2239                std::thread::sleep(Duration::from_millis(1));
2240            },
2241        )
2242        .unwrap();
2243
2244        let json = serde_json::to_value(&report).unwrap();
2245        assert_eq!(json["timeline"][0]["phase"], "setup");
2246        assert_eq!(json["timeline"][1]["phase"], "warmup-benchmark");
2247        assert_eq!(json["timeline"][2]["phase"], "measured-benchmark");
2248        assert_eq!(json["timeline"][3]["phase"], "measured-benchmark");
2249        assert_eq!(json["timeline"][4]["phase"], "teardown");
2250    }
2251}