Skip to main content

mobench_sdk/
timing.rs

1//! Lightweight benchmarking harness for mobile platforms.
2//!
3//! This module provides the core timing infrastructure for the mobench ecosystem.
4//! It was previously a separate crate (`mobench-runner`) but has been consolidated
5//! into `mobench-sdk` for a simpler dependency graph.
6//!
7//! The module is designed to be minimal and portable, with no platform-specific
8//! dependencies, making it suitable for compilation to Android and iOS targets.
9//!
10//! ## Overview
11//!
12//! The timing module executes benchmark functions with:
13//! - Configurable warmup iterations
14//! - Precise nanosecond-resolution timing
15//! - Simple, serializable results
16//!
17//! ## Usage
18//!
19//! Most users should use this via the higher-level [`crate::run_benchmark`] function
20//! or [`crate::BenchmarkBuilder`]. Direct usage is for custom integrations:
21//!
22//! ```
23//! use mobench_sdk::timing::{BenchSpec, run_closure, TimingError};
24//!
25//! // Define a benchmark specification
26//! let spec = BenchSpec::new("my_benchmark", 100, 10)?;
27//!
28//! // Run the benchmark
29//! let report = run_closure(spec, || {
30//!     // Your benchmark code
31//!     let sum: u64 = (0..1000).sum();
32//!     std::hint::black_box(sum);
33//!     Ok(())
34//! })?;
35//!
36//! // Analyze results
37//! let mean_ns = report.samples.iter()
38//!     .map(|s| s.duration_ns)
39//!     .sum::<u64>() / report.samples.len() as u64;
40//!
41//! println!("Mean: {} ns", mean_ns);
42//! # Ok::<(), TimingError>(())
43//! ```
44//!
45//! ## Types
46//!
47//! | Type | Description |
48//! |------|-------------|
49//! | [`BenchSpec`] | Benchmark configuration (name, iterations, warmup) |
50//! | [`BenchSample`] | Single timing measurement in nanoseconds |
51//! | [`BenchReport`] | Complete results with all samples |
52//! | [`TimingError`] | Error conditions during benchmarking |
53//!
54//! ## Feature Flags
55//!
56//! This module is always available. When using `mobench-sdk` with default features,
57//! you also get build automation and template generation. For minimal binary size
58//! (e.g., on mobile targets), use the `runner-only` feature:
59//!
60//! ```toml
61//! [dependencies]
62//! mobench-sdk = { version = "0.1", default-features = false, features = ["runner-only"] }
63//! ```
64
65use serde::{Deserialize, Serialize};
66use std::cell::RefCell;
67use std::sync::{
68    Arc,
69    atomic::{AtomicBool, AtomicU64, Ordering},
70    mpsc,
71};
72use std::thread::{self, JoinHandle};
73use std::time::{Duration, Instant};
74use thiserror::Error;
75
76/// Benchmark specification defining what and how to benchmark.
77///
78/// Contains the benchmark name, number of measurement iterations, and
79/// warmup iterations to perform before measuring.
80///
81/// # Example
82///
83/// ```
84/// use mobench_sdk::timing::BenchSpec;
85///
86/// // Create a spec for 100 iterations with 10 warmup runs
87/// let spec = BenchSpec::new("sorting_benchmark", 100, 10)?;
88///
89/// assert_eq!(spec.name, "sorting_benchmark");
90/// assert_eq!(spec.iterations, 100);
91/// assert_eq!(spec.warmup, 10);
92/// # Ok::<(), mobench_sdk::timing::TimingError>(())
93/// ```
94///
95/// # Serialization
96///
97/// `BenchSpec` implements `Serialize` and `Deserialize` for JSON persistence:
98///
99/// ```
100/// use mobench_sdk::timing::BenchSpec;
101///
102/// let spec = BenchSpec {
103///     name: "my_bench".to_string(),
104///     iterations: 50,
105///     warmup: 5,
106/// };
107///
108/// let json = serde_json::to_string(&spec)?;
109/// let restored: BenchSpec = serde_json::from_str(&json)?;
110///
111/// assert_eq!(spec.name, restored.name);
112/// # Ok::<(), serde_json::Error>(())
113/// ```
114#[derive(Clone, Debug, Serialize, Deserialize)]
115pub struct BenchSpec {
116    /// Name of the benchmark, typically the fully-qualified function name.
117    ///
118    /// Examples: `"my_crate::fibonacci"`, `"sorting_benchmark"`
119    pub name: String,
120
121    /// Number of iterations to measure.
122    ///
123    /// Each iteration produces one [`BenchSample`]. Must be greater than zero.
124    pub iterations: u32,
125
126    /// Number of warmup iterations before measurement.
127    ///
128    /// Warmup iterations are not recorded. They allow CPU caches to warm
129    /// and any JIT compilation to complete. Can be zero.
130    pub warmup: u32,
131}
132
133impl BenchSpec {
134    /// Creates a new benchmark specification.
135    ///
136    /// # Arguments
137    ///
138    /// * `name` - Name identifier for the benchmark
139    /// * `iterations` - Number of measured iterations (must be > 0)
140    /// * `warmup` - Number of warmup iterations (can be 0)
141    ///
142    /// # Errors
143    ///
144    /// Returns [`TimingError::NoIterations`] if `iterations` is zero.
145    ///
146    /// # Example
147    ///
148    /// ```
149    /// use mobench_sdk::timing::BenchSpec;
150    ///
151    /// let spec = BenchSpec::new("test", 100, 10)?;
152    /// assert_eq!(spec.iterations, 100);
153    ///
154    /// // Zero iterations is an error
155    /// let err = BenchSpec::new("test", 0, 10);
156    /// assert!(err.is_err());
157    /// # Ok::<(), mobench_sdk::timing::TimingError>(())
158    /// ```
159    pub fn new(name: impl Into<String>, iterations: u32, warmup: u32) -> Result<Self, TimingError> {
160        if iterations == 0 {
161            return Err(TimingError::NoIterations { count: iterations });
162        }
163
164        Ok(Self {
165            name: name.into(),
166            iterations,
167            warmup,
168        })
169    }
170}
171
172/// A single timing sample from a benchmark iteration.
173///
174/// Contains the elapsed time in nanoseconds for one execution of the
175/// benchmark function.
176///
177/// # Example
178///
179/// ```
180/// use mobench_sdk::timing::BenchSample;
181///
182/// let sample = BenchSample {
183///     duration_ns: 1_500_000,
184///     ..Default::default()
185/// };
186///
187/// // Convert to milliseconds
188/// let ms = sample.duration_ns as f64 / 1_000_000.0;
189/// assert_eq!(ms, 1.5);
190/// ```
191#[derive(Clone, Debug, Default, Serialize, Deserialize)]
192pub struct BenchSample {
193    /// Duration of the iteration in nanoseconds.
194    ///
195    /// Measured using [`std::time::Instant`] for monotonic, high-resolution timing.
196    pub duration_ns: u64,
197
198    /// CPU time consumed by the measured iteration in milliseconds.
199    ///
200    /// This is captured around the measured benchmark closure only and excludes
201    /// warmup, setup, teardown, and report generation overhead.
202    #[serde(default, skip_serializing_if = "Option::is_none")]
203    pub cpu_time_ms: Option<u64>,
204
205    /// Peak memory growth during the measured iteration in kilobytes.
206    ///
207    /// This legacy wire field is baseline-adjusted immediately before the
208    /// measured closure enters. It reports growth during the measured
209    /// iteration, not absolute process or device peak memory.
210    #[serde(default, skip_serializing_if = "Option::is_none")]
211    pub peak_memory_kb: Option<u64>,
212
213    /// Peak resident memory of the benchmark process during the measured iteration.
214    ///
215    /// This is sampled from the current process while the measured closure is
216    /// running. Unlike `peak_memory_kb`, it is not baseline-adjusted.
217    #[serde(default, skip_serializing_if = "Option::is_none")]
218    pub process_peak_memory_kb: Option<u64>,
219}
220
221impl BenchSample {
222    fn from_measurement(duration: Duration, resources: IterationResourceUsage) -> Self {
223        Self {
224            duration_ns: duration.as_nanos() as u64,
225            cpu_time_ms: resources.cpu_time_ms,
226            peak_memory_kb: resources.peak_memory_kb,
227            process_peak_memory_kb: resources.process_peak_memory_kb,
228        }
229    }
230}
231
232/// Complete benchmark report with all timing samples.
233///
234/// Contains the original specification and all collected samples.
235/// Can be serialized to JSON for storage or transmission.
236///
237/// # Example
238///
239/// ```
240/// use mobench_sdk::timing::{BenchSpec, run_closure};
241///
242/// let spec = BenchSpec::new("example", 50, 5)?;
243/// let report = run_closure(spec, || {
244///     std::hint::black_box(42);
245///     Ok(())
246/// })?;
247///
248/// // Calculate statistics
249/// let samples: Vec<u64> = report.samples.iter()
250///     .map(|s| s.duration_ns)
251///     .collect();
252///
253/// let min = samples.iter().min().unwrap();
254/// let max = samples.iter().max().unwrap();
255/// let mean = samples.iter().sum::<u64>() / samples.len() as u64;
256///
257/// println!("Min: {} ns, Max: {} ns, Mean: {} ns", min, max, mean);
258/// # Ok::<(), mobench_sdk::timing::TimingError>(())
259/// ```
260#[derive(Clone, Debug, Serialize, Deserialize)]
261pub struct BenchReport {
262    /// The specification used for this benchmark run.
263    pub spec: BenchSpec,
264
265    /// All collected timing samples.
266    ///
267    /// The length equals `spec.iterations`. Samples are in execution order.
268    pub samples: Vec<BenchSample>,
269
270    /// Optional semantic phase timings captured during measured iterations.
271    pub phases: Vec<SemanticPhase>,
272
273    /// Exact harness timeline spans in execution order.
274    pub timeline: Vec<HarnessTimelineSpan>,
275}
276
277#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
278pub struct HarnessTimelineSpan {
279    pub phase: String,
280    pub start_offset_ns: u64,
281    pub end_offset_ns: u64,
282    pub iteration: Option<u32>,
283}
284
285impl BenchReport {
286    /// Returns the mean (average) duration in nanoseconds.
287    #[must_use]
288    pub fn mean_ns(&self) -> f64 {
289        if self.samples.is_empty() {
290            return 0.0;
291        }
292        let sum: u64 = self.samples.iter().map(|s| s.duration_ns).sum();
293        sum as f64 / self.samples.len() as f64
294    }
295
296    /// Returns the median duration in nanoseconds.
297    #[must_use]
298    pub fn median_ns(&self) -> f64 {
299        if self.samples.is_empty() {
300            return 0.0;
301        }
302        let mut sorted: Vec<u64> = self.samples.iter().map(|s| s.duration_ns).collect();
303        sorted.sort_unstable();
304        let len = sorted.len();
305        if len % 2 == 0 {
306            (sorted[len / 2 - 1] + sorted[len / 2]) as f64 / 2.0
307        } else {
308            sorted[len / 2] as f64
309        }
310    }
311
312    /// Returns the standard deviation in nanoseconds (sample std dev, n-1).
313    #[must_use]
314    pub fn std_dev_ns(&self) -> f64 {
315        if self.samples.len() < 2 {
316            return 0.0;
317        }
318        let mean = self.mean_ns();
319        let variance: f64 = self
320            .samples
321            .iter()
322            .map(|s| {
323                let diff = s.duration_ns as f64 - mean;
324                diff * diff
325            })
326            .sum::<f64>()
327            / (self.samples.len() - 1) as f64;
328        variance.sqrt()
329    }
330
331    /// Returns the given percentile (0-100) in nanoseconds.
332    #[must_use]
333    pub fn percentile_ns(&self, p: f64) -> f64 {
334        if self.samples.is_empty() {
335            return 0.0;
336        }
337        let mut sorted: Vec<u64> = self.samples.iter().map(|s| s.duration_ns).collect();
338        sorted.sort_unstable();
339        let p = p.clamp(0.0, 100.0) / 100.0;
340        let index = (p * (sorted.len() - 1) as f64).round() as usize;
341        sorted[index.min(sorted.len() - 1)] as f64
342    }
343
344    /// Returns the minimum duration in nanoseconds.
345    #[must_use]
346    pub fn min_ns(&self) -> u64 {
347        self.samples
348            .iter()
349            .map(|s| s.duration_ns)
350            .min()
351            .unwrap_or(0)
352    }
353
354    /// Returns the maximum duration in nanoseconds.
355    #[must_use]
356    pub fn max_ns(&self) -> u64 {
357        self.samples
358            .iter()
359            .map(|s| s.duration_ns)
360            .max()
361            .unwrap_or(0)
362    }
363
364    /// Returns the total measured CPU time in milliseconds across all iterations.
365    #[must_use]
366    pub fn cpu_total_ms(&self) -> Option<u64> {
367        let values = self
368            .samples
369            .iter()
370            .filter_map(|sample| sample.cpu_time_ms)
371            .collect::<Vec<_>>();
372        if values.is_empty() {
373            return None;
374        }
375
376        let total = values
377            .iter()
378            .fold(0_u128, |sum, value| sum.saturating_add(u128::from(*value)));
379        Some(total.min(u128::from(u64::MAX)) as u64)
380    }
381
382    /// Returns the median measured CPU time in milliseconds across all iterations.
383    #[must_use]
384    pub fn cpu_median_ms(&self) -> Option<u64> {
385        let mut values = self
386            .samples
387            .iter()
388            .filter_map(|sample| sample.cpu_time_ms)
389            .collect::<Vec<_>>();
390        if values.is_empty() {
391            return None;
392        }
393
394        values.sort_unstable();
395        let len = values.len();
396        Some(if len % 2 == 0 {
397            let lower = u128::from(values[(len / 2) - 1]);
398            let upper = u128::from(values[len / 2]);
399            ((lower + upper) / 2) as u64
400        } else {
401            values[len / 2]
402        })
403    }
404
405    /// Returns the maximum baseline-adjusted peak memory growth in kilobytes.
406    ///
407    /// This is the legacy accessor for the serialized `peak_memory_kb` sample
408    /// field. It does not report absolute process or device peak memory.
409    #[must_use]
410    pub fn peak_memory_kb(&self) -> Option<u64> {
411        self.samples
412            .iter()
413            .filter_map(|sample| sample.peak_memory_kb)
414            .max()
415    }
416
417    /// Returns the maximum baseline-adjusted peak memory growth in kilobytes.
418    ///
419    /// This is an explicit alias for [`BenchReport::peak_memory_kb`] to make the
420    /// growth semantics clear while preserving the legacy wire field.
421    #[must_use]
422    pub fn peak_memory_growth_kb(&self) -> Option<u64> {
423        self.peak_memory_kb()
424    }
425
426    /// Returns the maximum process resident memory peak in kilobytes.
427    ///
428    /// This reports the current benchmark process peak sampled during measured
429    /// iterations. It excludes BrowserStack/session-level provider memory.
430    #[must_use]
431    pub fn process_peak_memory_kb(&self) -> Option<u64> {
432        self.samples
433            .iter()
434            .filter_map(|sample| sample.process_peak_memory_kb)
435            .max()
436    }
437
438    /// Returns a statistical summary of the benchmark results.
439    #[must_use]
440    pub fn summary(&self) -> BenchSummary {
441        BenchSummary {
442            name: self.spec.name.clone(),
443            iterations: self.samples.len() as u32,
444            warmup: self.spec.warmup,
445            mean_ns: self.mean_ns(),
446            median_ns: self.median_ns(),
447            std_dev_ns: self.std_dev_ns(),
448            min_ns: self.min_ns(),
449            max_ns: self.max_ns(),
450            p95_ns: self.percentile_ns(95.0),
451            p99_ns: self.percentile_ns(99.0),
452        }
453    }
454}
455
456#[derive(Clone, Debug, Default)]
457struct IterationResourceUsage {
458    cpu_time_ms: Option<u64>,
459    peak_memory_kb: Option<u64>,
460    process_peak_memory_kb: Option<u64>,
461}
462
463fn instant_offset_ns(origin: Instant, instant: Instant) -> u64 {
464    instant
465        .duration_since(origin)
466        .as_nanos()
467        .min(u128::from(u64::MAX)) as u64
468}
469
470fn push_timeline_span(
471    timeline: &mut Vec<HarnessTimelineSpan>,
472    origin: Instant,
473    phase: &str,
474    started_at: Instant,
475    ended_at: Instant,
476    iteration: Option<u32>,
477) {
478    timeline.push(HarnessTimelineSpan {
479        phase: phase.to_string(),
480        start_offset_ns: instant_offset_ns(origin, started_at),
481        end_offset_ns: instant_offset_ns(origin, ended_at),
482        iteration,
483    });
484}
485
486/// Statistical summary of benchmark results.
487#[derive(Clone, Debug, Serialize, Deserialize)]
488pub struct BenchSummary {
489    /// Name of the benchmark.
490    pub name: String,
491    /// Number of measured iterations.
492    pub iterations: u32,
493    /// Number of warmup iterations.
494    pub warmup: u32,
495    /// Mean duration in nanoseconds.
496    pub mean_ns: f64,
497    /// Median duration in nanoseconds.
498    pub median_ns: f64,
499    /// Standard deviation in nanoseconds.
500    pub std_dev_ns: f64,
501    /// Minimum duration in nanoseconds.
502    pub min_ns: u64,
503    /// Maximum duration in nanoseconds.
504    pub max_ns: u64,
505    /// 95th percentile in nanoseconds.
506    pub p95_ns: f64,
507    /// 99th percentile in nanoseconds.
508    pub p99_ns: f64,
509}
510
511/// Flat semantic phase timing captured during a benchmark run.
512#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
513pub struct SemanticPhase {
514    pub name: String,
515    pub duration_ns: u64,
516}
517
518#[derive(Default)]
519struct SemanticPhaseCollector {
520    enabled: bool,
521    depth: usize,
522    phases: Vec<SemanticPhase>,
523}
524
525impl SemanticPhaseCollector {
526    fn reset(&mut self) {
527        self.enabled = false;
528        self.depth = 0;
529        self.phases.clear();
530    }
531
532    fn begin_measurement(&mut self) {
533        self.reset();
534        self.enabled = true;
535    }
536
537    fn finish(&mut self) -> Vec<SemanticPhase> {
538        self.enabled = false;
539        self.depth = 0;
540        std::mem::take(&mut self.phases)
541    }
542
543    fn enter_phase(&mut self) -> Option<bool> {
544        if !self.enabled {
545            return None;
546        }
547        let top_level = self.depth == 0;
548        self.depth += 1;
549        Some(top_level)
550    }
551
552    fn exit_phase(&mut self, name: &str, top_level: bool, elapsed: Duration) {
553        self.depth = self.depth.saturating_sub(1);
554        if !self.enabled || !top_level {
555            return;
556        }
557
558        let duration_ns = elapsed.as_nanos().min(u128::from(u64::MAX)) as u64;
559        if let Some(phase) = self.phases.iter_mut().find(|phase| phase.name == name) {
560            phase.duration_ns = phase.duration_ns.saturating_add(duration_ns);
561        } else {
562            self.phases.push(SemanticPhase {
563                name: name.to_string(),
564                duration_ns,
565            });
566        }
567    }
568}
569
570thread_local! {
571    static SEMANTIC_PHASE_COLLECTOR: RefCell<SemanticPhaseCollector> =
572        RefCell::new(SemanticPhaseCollector::default());
573}
574
575struct SemanticPhaseGuard {
576    name: String,
577    started_at: Option<Instant>,
578    top_level: bool,
579}
580
581impl Drop for SemanticPhaseGuard {
582    fn drop(&mut self) {
583        let Some(started_at) = self.started_at else {
584            return;
585        };
586
587        let elapsed = started_at.elapsed();
588        SEMANTIC_PHASE_COLLECTOR.with(|collector| {
589            collector
590                .borrow_mut()
591                .exit_phase(&self.name, self.top_level, elapsed);
592        });
593    }
594}
595
596fn reset_semantic_phase_collection() {
597    SEMANTIC_PHASE_COLLECTOR.with(|collector| collector.borrow_mut().reset());
598}
599
600fn begin_semantic_phase_collection() {
601    SEMANTIC_PHASE_COLLECTOR.with(|collector| collector.borrow_mut().begin_measurement());
602}
603
604fn finish_semantic_phase_collection() -> Vec<SemanticPhase> {
605    SEMANTIC_PHASE_COLLECTOR.with(|collector| collector.borrow_mut().finish())
606}
607
608trait ResourceMonitor {
609    type Token;
610
611    fn start(&mut self) -> Self::Token;
612
613    fn finish(&mut self, token: Self::Token) -> IterationResourceUsage;
614}
615
616#[derive(Default)]
617struct DefaultResourceMonitor;
618
619#[derive(Clone, Copy, Debug, PartialEq, Eq)]
620struct ProcessCpuTimeSnapshot {
621    user_ns: u64,
622    system_ns: u64,
623}
624
625impl ProcessCpuTimeSnapshot {
626    #[cfg(unix)]
627    fn from_rusage_timevals(user: libc::timeval, system: libc::timeval) -> Option<Self> {
628        Some(Self {
629            user_ns: timeval_to_ns(user)?,
630            system_ns: timeval_to_ns(system)?,
631        })
632    }
633
634    fn total_ns(self) -> u64 {
635        self.user_ns.saturating_add(self.system_ns)
636    }
637}
638
639struct DefaultResourceToken {
640    cpu_time_start: Option<ProcessCpuTimeSnapshot>,
641    memory_sampler: Option<MemoryPeakSampler>,
642}
643
644impl ResourceMonitor for DefaultResourceMonitor {
645    type Token = DefaultResourceToken;
646
647    fn start(&mut self) -> Self::Token {
648        Self::Token {
649            cpu_time_start: current_process_cpu_time(),
650            memory_sampler: MemoryPeakSampler::start(),
651        }
652    }
653
654    fn finish(&mut self, token: Self::Token) -> IterationResourceUsage {
655        let cpu_time_ms = token
656            .cpu_time_start
657            .zip(current_process_cpu_time())
658            .and_then(|(start, end)| process_cpu_delta_ms(start, end));
659
660        let memory_peak = token.memory_sampler.and_then(MemoryPeakSampler::stop);
661
662        IterationResourceUsage {
663            cpu_time_ms,
664            peak_memory_kb: memory_peak
665                .and_then(|peak| (peak.growth_kb > 0).then_some(peak.growth_kb)),
666            process_peak_memory_kb: memory_peak
667                .and_then(|peak| (peak.process_peak_kb > 0).then_some(peak.process_peak_kb)),
668        }
669    }
670}
671
672fn round_ns_to_ms(ns: u64) -> u64 {
673    ((u128::from(ns) + 500_000) / 1_000_000) as u64
674}
675
676#[cfg(unix)]
677fn process_cpu_delta_ms(start: ProcessCpuTimeSnapshot, end: ProcessCpuTimeSnapshot) -> Option<u64> {
678    Some(round_ns_to_ms(
679        end.total_ns().checked_sub(start.total_ns())?,
680    ))
681}
682
683#[cfg(not(unix))]
684fn process_cpu_delta_ms(
685    _start: ProcessCpuTimeSnapshot,
686    _end: ProcessCpuTimeSnapshot,
687) -> Option<u64> {
688    None
689}
690
691#[cfg(unix)]
692fn timeval_to_ns(value: libc::timeval) -> Option<u64> {
693    let secs = u64::try_from(value.tv_sec).ok()?;
694    let micros = u64::try_from(value.tv_usec).ok()?;
695    Some(
696        secs.saturating_mul(1_000_000_000)
697            .saturating_add(micros.saturating_mul(1_000)),
698    )
699}
700
701#[cfg(unix)]
702fn current_process_cpu_time() -> Option<ProcessCpuTimeSnapshot> {
703    let mut usage = std::mem::MaybeUninit::<libc::rusage>::uninit();
704    let rc = unsafe { libc::getrusage(libc::RUSAGE_SELF, usage.as_mut_ptr()) };
705    if rc != 0 {
706        return None;
707    }
708
709    let usage = unsafe { usage.assume_init() };
710    ProcessCpuTimeSnapshot::from_rusage_timevals(usage.ru_utime, usage.ru_stime)
711}
712
713#[cfg(not(unix))]
714fn current_process_cpu_time() -> Option<ProcessCpuTimeSnapshot> {
715    None
716}
717
718const MEMORY_SAMPLER_INTERVAL: Duration = Duration::from_millis(1);
719type MemoryReader = Arc<dyn Fn() -> Option<u64> + Send + Sync + 'static>;
720
721#[derive(Clone, Copy, Debug, PartialEq, Eq)]
722struct ProcessMemoryPeak {
723    growth_kb: u64,
724    process_peak_kb: u64,
725}
726
727struct MemoryPeakSampler {
728    baseline_kb: u64,
729    stop_flag: Arc<AtomicBool>,
730    peak_kb: Arc<AtomicU64>,
731    handle: JoinHandle<()>,
732}
733
734impl MemoryPeakSampler {
735    fn start() -> Option<Self> {
736        Self::start_with_reader(Arc::new(|| current_process_memory_kb()))
737    }
738
739    fn start_with_reader(reader: MemoryReader) -> Option<Self> {
740        let stop_flag = Arc::new(AtomicBool::new(false));
741        let peak_kb = Arc::new(AtomicU64::new(0));
742        let (ready_tx, ready_rx) = mpsc::sync_channel(1);
743        let (baseline_tx, baseline_rx) = mpsc::sync_channel(1);
744        let sampler_stop = Arc::clone(&stop_flag);
745        let sampler_peak = Arc::clone(&peak_kb);
746        let sampler_reader = Arc::clone(&reader);
747
748        let handle = thread::Builder::new()
749            .name("mobench-memory-sampler".to_string())
750            .spawn(move || {
751                // Touch the sampler thread's own stack and runtime state before the
752                // benchmark baseline is captured so its overhead is not reported as
753                // measured benchmark memory.
754                let _ = sampler_reader();
755                let _ = ready_tx.send(());
756
757                let Some(baseline_kb) = baseline_rx.recv().ok().flatten() else {
758                    return;
759                };
760                sampler_peak.store(baseline_kb, Ordering::Release);
761
762                while !sampler_stop.load(Ordering::Acquire) {
763                    if let Some(current_kb) = sampler_reader() {
764                        update_atomic_max(&sampler_peak, current_kb);
765                    }
766                    thread::sleep(MEMORY_SAMPLER_INTERVAL);
767                }
768
769                if let Some(current_kb) = sampler_reader() {
770                    update_atomic_max(&sampler_peak, current_kb);
771                }
772            })
773            .ok()?;
774
775        if ready_rx.recv().is_err() {
776            stop_flag.store(true, Ordering::Release);
777            let _ = handle.join();
778            return None;
779        }
780
781        let baseline_kb = match reader() {
782            Some(value) => value,
783            None => {
784                let _ = baseline_tx.send(None);
785                stop_flag.store(true, Ordering::Release);
786                let _ = handle.join();
787                return None;
788            }
789        };
790        if baseline_tx.send(Some(baseline_kb)).is_err() {
791            stop_flag.store(true, Ordering::Release);
792            let _ = handle.join();
793            return None;
794        }
795
796        Some(Self {
797            baseline_kb,
798            stop_flag,
799            peak_kb,
800            handle,
801        })
802    }
803
804    fn stop(self) -> Option<ProcessMemoryPeak> {
805        self.stop_flag.store(true, Ordering::Release);
806        let _ = self.handle.join();
807        let peak_kb = self.peak_kb.load(Ordering::Acquire);
808        Some(ProcessMemoryPeak {
809            growth_kb: peak_kb.saturating_sub(self.baseline_kb),
810            process_peak_kb: peak_kb,
811        })
812    }
813}
814
815fn update_atomic_max(target: &AtomicU64, value: u64) {
816    let mut current = target.load(Ordering::Relaxed);
817    while value > current {
818        match target.compare_exchange_weak(current, value, Ordering::Relaxed, Ordering::Relaxed) {
819            Ok(_) => break,
820            Err(observed) => current = observed,
821        }
822    }
823}
824
825#[cfg(any(target_os = "android", target_os = "linux"))]
826fn current_process_memory_kb() -> Option<u64> {
827    let statm = std::fs::read_to_string("/proc/self/statm").ok()?;
828    let resident_pages = statm
829        .split_whitespace()
830        .nth(1)
831        .and_then(|value| value.parse::<u64>().ok())?;
832    let page_size = unsafe { libc::sysconf(libc::_SC_PAGESIZE) };
833    if page_size <= 0 {
834        return None;
835    }
836    let page_size = u64::try_from(page_size).ok()?;
837    Some(resident_pages.saturating_mul(page_size) / 1024)
838}
839
840#[cfg(any(target_os = "ios", target_os = "macos"))]
841fn current_process_memory_kb() -> Option<u64> {
842    let mut info = std::mem::MaybeUninit::<libc::mach_task_basic_info_data_t>::uninit();
843    let mut count = libc::MACH_TASK_BASIC_INFO_COUNT;
844    #[allow(deprecated)]
845    let rc = unsafe {
846        libc::task_info(
847            libc::mach_task_self(),
848            libc::MACH_TASK_BASIC_INFO,
849            info.as_mut_ptr().cast::<libc::integer_t>(),
850            &mut count,
851        )
852    };
853    if rc != libc::KERN_SUCCESS {
854        return None;
855    }
856
857    let info = unsafe { info.assume_init() };
858    Some((info.resident_size / 1024) as u64)
859}
860
861#[cfg(not(any(
862    target_os = "android",
863    target_os = "linux",
864    target_os = "ios",
865    target_os = "macos"
866)))]
867fn current_process_memory_kb() -> Option<u64> {
868    None
869}
870
871fn measure_iteration<M, F>(
872    monitor: &mut M,
873    f: F,
874) -> Result<(BenchSample, Instant, Instant), TimingError>
875where
876    M: ResourceMonitor,
877    F: FnOnce() -> Result<(), TimingError>,
878{
879    let token = monitor.start();
880    let started_at = Instant::now();
881    let result = f();
882    let ended_at = Instant::now();
883    let resources = monitor.finish(token);
884    result.map(|_| {
885        (
886            BenchSample::from_measurement(ended_at.duration_since(started_at), resources),
887            started_at,
888            ended_at,
889        )
890    })
891}
892
893/// Records a flat semantic phase when called inside an active benchmark measurement loop.
894///
895/// Phases are aggregated across measured iterations and ignored during warmup/setup.
896/// Nested phases are intentionally collapsed in v1 to keep the output flat.
897pub fn profile_phase<T>(name: &str, f: impl FnOnce() -> T) -> T {
898    let guard = SEMANTIC_PHASE_COLLECTOR.with(|collector| {
899        let mut collector = collector.borrow_mut();
900        match collector.enter_phase() {
901            Some(top_level) => SemanticPhaseGuard {
902                name: name.to_string(),
903                started_at: Some(Instant::now()),
904                top_level,
905            },
906            None => SemanticPhaseGuard {
907                name: String::new(),
908                started_at: None,
909                top_level: false,
910            },
911        }
912    });
913
914    let result = f();
915    drop(guard);
916    result
917}
918
919/// Errors that can occur during benchmark execution.
920///
921/// # Example
922///
923/// ```
924/// use mobench_sdk::timing::{BenchSpec, TimingError};
925///
926/// // Zero iterations produces an error
927/// let result = BenchSpec::new("test", 0, 10);
928/// assert!(matches!(result, Err(TimingError::NoIterations { .. })));
929/// ```
930#[derive(Debug, Error)]
931pub enum TimingError {
932    /// The iteration count was zero or invalid.
933    ///
934    /// At least one iteration is required to produce a measurement.
935    /// The error includes the actual value provided for diagnostic purposes.
936    #[error("iterations must be greater than zero (got {count}). Minimum recommended: 10")]
937    NoIterations {
938        /// The invalid iteration count that was provided.
939        count: u32,
940    },
941
942    /// The benchmark function failed during execution.
943    ///
944    /// Contains a description of the failure.
945    #[error("benchmark function failed: {0}")]
946    Execution(String),
947}
948
949/// Runs a benchmark by executing a closure repeatedly.
950///
951/// This is the core benchmarking function. It:
952///
953/// 1. Executes the closure `spec.warmup` times without recording
954/// 2. Executes the closure `spec.iterations` times, recording each duration
955/// 3. Returns a [`BenchReport`] with all samples
956///
957/// # Arguments
958///
959/// * `spec` - Benchmark configuration specifying iterations and warmup
960/// * `f` - Closure to benchmark; must return `Result<(), TimingError>`
961///
962/// # Returns
963///
964/// A [`BenchReport`] containing all timing samples, or a [`TimingError`] if
965/// the benchmark fails.
966///
967/// # Example
968///
969/// ```
970/// use mobench_sdk::timing::{BenchSpec, run_closure, TimingError};
971///
972/// let spec = BenchSpec::new("sum_benchmark", 100, 10)?;
973///
974/// let report = run_closure(spec, || {
975///     let sum: u64 = (0..1000).sum();
976///     std::hint::black_box(sum);
977///     Ok(())
978/// })?;
979///
980/// assert_eq!(report.samples.len(), 100);
981///
982/// // Calculate mean duration
983/// let total_ns: u64 = report.samples.iter().map(|s| s.duration_ns).sum();
984/// let mean_ns = total_ns / report.samples.len() as u64;
985/// println!("Mean: {} ns", mean_ns);
986/// # Ok::<(), TimingError>(())
987/// ```
988///
989/// # Error Handling
990///
991/// If the closure returns an error, the benchmark stops immediately:
992///
993/// ```
994/// use mobench_sdk::timing::{BenchSpec, run_closure, TimingError};
995///
996/// let spec = BenchSpec::new("failing_bench", 100, 0)?;
997///
998/// let result = run_closure(spec, || {
999///     Err(TimingError::Execution("simulated failure".into()))
1000/// });
1001///
1002/// assert!(result.is_err());
1003/// # Ok::<(), TimingError>(())
1004/// ```
1005///
1006/// # Timing Precision
1007///
1008/// Uses [`std::time::Instant`] for timing, which provides monotonic,
1009/// nanosecond-resolution measurements on most platforms.
1010pub fn run_closure<F>(spec: BenchSpec, mut f: F) -> Result<BenchReport, TimingError>
1011where
1012    F: FnMut() -> Result<(), TimingError>,
1013{
1014    let mut monitor = DefaultResourceMonitor;
1015    run_closure_with_monitor(spec, &mut monitor, move || f())
1016}
1017
1018fn run_closure_with_monitor<F, M>(
1019    spec: BenchSpec,
1020    monitor: &mut M,
1021    mut f: F,
1022) -> Result<BenchReport, TimingError>
1023where
1024    F: FnMut() -> Result<(), TimingError>,
1025    M: ResourceMonitor,
1026{
1027    if spec.iterations == 0 {
1028        return Err(TimingError::NoIterations {
1029            count: spec.iterations,
1030        });
1031    }
1032
1033    reset_semantic_phase_collection();
1034    let harness_origin = Instant::now();
1035    let mut timeline = Vec::new();
1036
1037    // Warmup phase - not measured
1038    for iteration in 0..spec.warmup {
1039        let phase_start = Instant::now();
1040        f()?;
1041        push_timeline_span(
1042            &mut timeline,
1043            harness_origin,
1044            "warmup-benchmark",
1045            phase_start,
1046            Instant::now(),
1047            Some(iteration),
1048        );
1049    }
1050
1051    // Measurement phase
1052    begin_semantic_phase_collection();
1053    let mut samples = Vec::with_capacity(spec.iterations as usize);
1054    for iteration in 0..spec.iterations {
1055        let (sample, start, end) = match measure_iteration(monitor, || f()) {
1056            Ok(measurement) => measurement,
1057            Err(err) => {
1058                let _ = finish_semantic_phase_collection();
1059                return Err(err);
1060            }
1061        };
1062        samples.push(sample);
1063        push_timeline_span(
1064            &mut timeline,
1065            harness_origin,
1066            "measured-benchmark",
1067            start,
1068            end,
1069            Some(iteration),
1070        );
1071    }
1072    let phases = finish_semantic_phase_collection();
1073
1074    Ok(BenchReport {
1075        spec,
1076        samples,
1077        phases,
1078        timeline,
1079    })
1080}
1081
1082/// Runs a benchmark with setup that executes once before all iterations.
1083///
1084/// The setup function is called once before timing begins, then the benchmark
1085/// runs multiple times using a reference to the setup result. This is useful
1086/// for expensive initialization that shouldn't be included in timing.
1087///
1088/// # Arguments
1089///
1090/// * `spec` - Benchmark configuration specifying iterations and warmup
1091/// * `setup` - Function that creates the input data (called once, not timed)
1092/// * `f` - Benchmark closure that receives a reference to setup result
1093///
1094/// # Example
1095///
1096/// ```ignore
1097/// use mobench_sdk::timing::{BenchSpec, run_closure_with_setup};
1098///
1099/// fn setup_data() -> Vec<u8> {
1100///     vec![0u8; 1_000_000]  // Expensive allocation not measured
1101/// }
1102///
1103/// let spec = BenchSpec::new("hash_benchmark", 100, 10)?;
1104/// let report = run_closure_with_setup(spec, setup_data, |data| {
1105///     std::hint::black_box(compute_hash(data));
1106///     Ok(())
1107/// })?;
1108/// ```
1109pub fn run_closure_with_setup<S, T, F>(
1110    spec: BenchSpec,
1111    setup: S,
1112    mut f: F,
1113) -> Result<BenchReport, TimingError>
1114where
1115    S: FnOnce() -> T,
1116    F: FnMut(&T) -> Result<(), TimingError>,
1117{
1118    let mut monitor = DefaultResourceMonitor;
1119    run_closure_with_setup_with_monitor(spec, &mut monitor, setup, move |input| f(input))
1120}
1121
1122fn run_closure_with_setup_with_monitor<S, T, F, M>(
1123    spec: BenchSpec,
1124    monitor: &mut M,
1125    setup: S,
1126    mut f: F,
1127) -> Result<BenchReport, TimingError>
1128where
1129    S: FnOnce() -> T,
1130    F: FnMut(&T) -> Result<(), TimingError>,
1131    M: ResourceMonitor,
1132{
1133    if spec.iterations == 0 {
1134        return Err(TimingError::NoIterations {
1135            count: spec.iterations,
1136        });
1137    }
1138
1139    reset_semantic_phase_collection();
1140    let harness_origin = Instant::now();
1141    let mut timeline = Vec::new();
1142
1143    // Setup phase - not timed
1144    let setup_start = Instant::now();
1145    let input = setup();
1146    push_timeline_span(
1147        &mut timeline,
1148        harness_origin,
1149        "setup",
1150        setup_start,
1151        Instant::now(),
1152        None,
1153    );
1154
1155    // Warmup phase - not recorded
1156    for iteration in 0..spec.warmup {
1157        let phase_start = Instant::now();
1158        f(&input)?;
1159        push_timeline_span(
1160            &mut timeline,
1161            harness_origin,
1162            "warmup-benchmark",
1163            phase_start,
1164            Instant::now(),
1165            Some(iteration),
1166        );
1167    }
1168
1169    // Measurement phase
1170    begin_semantic_phase_collection();
1171    let mut samples = Vec::with_capacity(spec.iterations as usize);
1172    for iteration in 0..spec.iterations {
1173        let (sample, start, end) = match measure_iteration(monitor, || f(&input)) {
1174            Ok(measurement) => measurement,
1175            Err(err) => {
1176                let _ = finish_semantic_phase_collection();
1177                return Err(err);
1178            }
1179        };
1180        samples.push(sample);
1181        push_timeline_span(
1182            &mut timeline,
1183            harness_origin,
1184            "measured-benchmark",
1185            start,
1186            end,
1187            Some(iteration),
1188        );
1189    }
1190    let phases = finish_semantic_phase_collection();
1191
1192    Ok(BenchReport {
1193        spec,
1194        samples,
1195        phases,
1196        timeline,
1197    })
1198}
1199
1200/// Runs a benchmark with per-iteration setup.
1201///
1202/// Setup runs before each iteration and is not timed. The benchmark takes
1203/// ownership of the setup result, making this suitable for benchmarks that
1204/// mutate their input (e.g., sorting).
1205///
1206/// # Arguments
1207///
1208/// * `spec` - Benchmark configuration specifying iterations and warmup
1209/// * `setup` - Function that creates fresh input for each iteration (not timed)
1210/// * `f` - Benchmark closure that takes ownership of setup result
1211///
1212/// # Example
1213///
1214/// ```ignore
1215/// use mobench_sdk::timing::{BenchSpec, run_closure_with_setup_per_iter};
1216///
1217/// fn generate_random_vec() -> Vec<i32> {
1218///     (0..1000).map(|_| rand::random()).collect()
1219/// }
1220///
1221/// let spec = BenchSpec::new("sort_benchmark", 100, 10)?;
1222/// let report = run_closure_with_setup_per_iter(spec, generate_random_vec, |mut data| {
1223///     data.sort();
1224///     std::hint::black_box(data);
1225///     Ok(())
1226/// })?;
1227/// ```
1228pub fn run_closure_with_setup_per_iter<S, T, F>(
1229    spec: BenchSpec,
1230    mut setup: S,
1231    mut f: F,
1232) -> Result<BenchReport, TimingError>
1233where
1234    S: FnMut() -> T,
1235    F: FnMut(T) -> Result<(), TimingError>,
1236{
1237    let mut monitor = DefaultResourceMonitor;
1238    run_closure_with_setup_per_iter_with_monitor(
1239        spec,
1240        &mut monitor,
1241        move || setup(),
1242        move |input| f(input),
1243    )
1244}
1245
1246fn run_closure_with_setup_per_iter_with_monitor<S, T, F, M>(
1247    spec: BenchSpec,
1248    monitor: &mut M,
1249    mut setup: S,
1250    mut f: F,
1251) -> Result<BenchReport, TimingError>
1252where
1253    S: FnMut() -> T,
1254    F: FnMut(T) -> Result<(), TimingError>,
1255    M: ResourceMonitor,
1256{
1257    if spec.iterations == 0 {
1258        return Err(TimingError::NoIterations {
1259            count: spec.iterations,
1260        });
1261    }
1262
1263    reset_semantic_phase_collection();
1264    let harness_origin = Instant::now();
1265    let mut timeline = Vec::new();
1266
1267    // Warmup phase
1268    for iteration in 0..spec.warmup {
1269        let setup_start = Instant::now();
1270        let input = setup();
1271        push_timeline_span(
1272            &mut timeline,
1273            harness_origin,
1274            "fixture-setup",
1275            setup_start,
1276            Instant::now(),
1277            Some(iteration),
1278        );
1279        let phase_start = Instant::now();
1280        f(input)?;
1281        push_timeline_span(
1282            &mut timeline,
1283            harness_origin,
1284            "warmup-benchmark",
1285            phase_start,
1286            Instant::now(),
1287            Some(iteration),
1288        );
1289    }
1290
1291    // Measurement phase
1292    begin_semantic_phase_collection();
1293    let mut samples = Vec::with_capacity(spec.iterations as usize);
1294    for iteration in 0..spec.iterations {
1295        let setup_start = Instant::now();
1296        let input = setup(); // Not timed
1297        push_timeline_span(
1298            &mut timeline,
1299            harness_origin,
1300            "fixture-setup",
1301            setup_start,
1302            Instant::now(),
1303            Some(iteration),
1304        );
1305
1306        let (sample, start, end) = match measure_iteration(monitor, || f(input)) {
1307            Ok(measurement) => measurement,
1308            Err(err) => {
1309                let _ = finish_semantic_phase_collection();
1310                return Err(err);
1311            }
1312        };
1313        samples.push(sample);
1314        push_timeline_span(
1315            &mut timeline,
1316            harness_origin,
1317            "measured-benchmark",
1318            start,
1319            end,
1320            Some(iteration),
1321        );
1322    }
1323    let phases = finish_semantic_phase_collection();
1324
1325    Ok(BenchReport {
1326        spec,
1327        samples,
1328        phases,
1329        timeline,
1330    })
1331}
1332
1333/// Runs a benchmark with setup and teardown.
1334///
1335/// Setup runs once before all iterations, teardown runs once after all
1336/// iterations complete. Neither is included in timing.
1337///
1338/// # Arguments
1339///
1340/// * `spec` - Benchmark configuration specifying iterations and warmup
1341/// * `setup` - Function that creates the input data (called once, not timed)
1342/// * `f` - Benchmark closure that receives a reference to setup result
1343/// * `teardown` - Function that cleans up the input (called once, not timed)
1344///
1345/// # Example
1346///
1347/// ```ignore
1348/// use mobench_sdk::timing::{BenchSpec, run_closure_with_setup_teardown};
1349///
1350/// fn setup_db() -> Database { Database::connect("test.db") }
1351/// fn cleanup_db(db: Database) { db.close(); std::fs::remove_file("test.db").ok(); }
1352///
1353/// let spec = BenchSpec::new("db_benchmark", 100, 10)?;
1354/// let report = run_closure_with_setup_teardown(
1355///     spec,
1356///     setup_db,
1357///     |db| { db.query("SELECT *"); Ok(()) },
1358///     cleanup_db,
1359/// )?;
1360/// ```
1361pub fn run_closure_with_setup_teardown<S, T, F, D>(
1362    spec: BenchSpec,
1363    setup: S,
1364    mut f: F,
1365    teardown: D,
1366) -> Result<BenchReport, TimingError>
1367where
1368    S: FnOnce() -> T,
1369    F: FnMut(&T) -> Result<(), TimingError>,
1370    D: FnOnce(T),
1371{
1372    let mut monitor = DefaultResourceMonitor;
1373    run_closure_with_setup_teardown_with_monitor(
1374        spec,
1375        &mut monitor,
1376        setup,
1377        move |input| f(input),
1378        teardown,
1379    )
1380}
1381
1382fn run_closure_with_setup_teardown_with_monitor<S, T, F, D, M>(
1383    spec: BenchSpec,
1384    monitor: &mut M,
1385    setup: S,
1386    mut f: F,
1387    teardown: D,
1388) -> Result<BenchReport, TimingError>
1389where
1390    S: FnOnce() -> T,
1391    F: FnMut(&T) -> Result<(), TimingError>,
1392    D: FnOnce(T),
1393    M: ResourceMonitor,
1394{
1395    if spec.iterations == 0 {
1396        return Err(TimingError::NoIterations {
1397            count: spec.iterations,
1398        });
1399    }
1400
1401    reset_semantic_phase_collection();
1402    let harness_origin = Instant::now();
1403    let mut timeline = Vec::new();
1404
1405    // Setup phase - not timed
1406    let setup_start = Instant::now();
1407    let input = setup();
1408    push_timeline_span(
1409        &mut timeline,
1410        harness_origin,
1411        "setup",
1412        setup_start,
1413        Instant::now(),
1414        None,
1415    );
1416
1417    // Warmup phase
1418    for iteration in 0..spec.warmup {
1419        let phase_start = Instant::now();
1420        f(&input)?;
1421        push_timeline_span(
1422            &mut timeline,
1423            harness_origin,
1424            "warmup-benchmark",
1425            phase_start,
1426            Instant::now(),
1427            Some(iteration),
1428        );
1429    }
1430
1431    // Measurement phase
1432    begin_semantic_phase_collection();
1433    let mut samples = Vec::with_capacity(spec.iterations as usize);
1434    for iteration in 0..spec.iterations {
1435        let (sample, start, end) = match measure_iteration(monitor, || f(&input)) {
1436            Ok(measurement) => measurement,
1437            Err(err) => {
1438                let _ = finish_semantic_phase_collection();
1439                return Err(err);
1440            }
1441        };
1442        samples.push(sample);
1443        push_timeline_span(
1444            &mut timeline,
1445            harness_origin,
1446            "measured-benchmark",
1447            start,
1448            end,
1449            Some(iteration),
1450        );
1451    }
1452    let phases = finish_semantic_phase_collection();
1453
1454    // Teardown phase - not timed
1455    let teardown_start = Instant::now();
1456    teardown(input);
1457    push_timeline_span(
1458        &mut timeline,
1459        harness_origin,
1460        "teardown",
1461        teardown_start,
1462        Instant::now(),
1463        None,
1464    );
1465
1466    Ok(BenchReport {
1467        spec,
1468        samples,
1469        phases,
1470        timeline,
1471    })
1472}
1473
1474#[cfg(test)]
1475mod tests {
1476    use super::*;
1477
1478    #[derive(Default)]
1479    struct FakeResourceMonitor {
1480        samples: Vec<IterationResourceUsage>,
1481        started: usize,
1482        finished: usize,
1483    }
1484
1485    impl FakeResourceMonitor {
1486        fn new(samples: Vec<IterationResourceUsage>) -> Self {
1487            Self {
1488                samples,
1489                started: 0,
1490                finished: 0,
1491            }
1492        }
1493    }
1494
1495    impl ResourceMonitor for FakeResourceMonitor {
1496        type Token = usize;
1497
1498        fn start(&mut self) -> Self::Token {
1499            let token = self.started;
1500            self.started += 1;
1501            assert!(
1502                token < self.samples.len(),
1503                "resource capture should only run for measured iterations"
1504            );
1505            token
1506        }
1507
1508        fn finish(&mut self, token: Self::Token) -> IterationResourceUsage {
1509            self.finished += 1;
1510            self.samples
1511                .get(token)
1512                .cloned()
1513                .expect("resource usage for measured iteration")
1514        }
1515    }
1516
1517    #[cfg(unix)]
1518    #[test]
1519    fn process_cpu_time_snapshot_sums_user_and_kernel_time() {
1520        let snapshot = ProcessCpuTimeSnapshot::from_rusage_timevals(
1521            libc::timeval {
1522                tv_sec: 1,
1523                tv_usec: 250_000,
1524            },
1525            libc::timeval {
1526                tv_sec: 0,
1527                tv_usec: 750_000,
1528            },
1529        )
1530        .expect("valid snapshot");
1531
1532        assert_eq!(snapshot.total_ns(), 2_000_000_000);
1533    }
1534
1535    #[cfg(unix)]
1536    #[test]
1537    fn process_cpu_time_delta_ms_uses_user_and_kernel_time() {
1538        let start = ProcessCpuTimeSnapshot::from_rusage_timevals(
1539            libc::timeval {
1540                tv_sec: 1,
1541                tv_usec: 250_000,
1542            },
1543            libc::timeval {
1544                tv_sec: 0,
1545                tv_usec: 750_000,
1546            },
1547        )
1548        .expect("valid start snapshot");
1549        let end = ProcessCpuTimeSnapshot::from_rusage_timevals(
1550            libc::timeval {
1551                tv_sec: 1,
1552                tv_usec: 900_000,
1553            },
1554            libc::timeval {
1555                tv_sec: 1,
1556                tv_usec: 400_600,
1557            },
1558        )
1559        .expect("valid end snapshot");
1560
1561        assert_eq!(process_cpu_delta_ms(start, end), Some(1_301));
1562    }
1563
1564    #[test]
1565    fn runs_benchmark_collects_requested_samples() {
1566        let spec = BenchSpec::new("noop", 3, 1).unwrap();
1567        let report = run_closure(spec, || Ok(())).unwrap();
1568
1569        assert_eq!(report.samples.len(), 3);
1570        assert_eq!(report.spec.name, "noop");
1571        assert_eq!(report.spec.iterations, 3);
1572    }
1573
1574    #[test]
1575    fn rejects_zero_iterations() {
1576        let result = BenchSpec::new("test", 0, 10);
1577        assert!(matches!(
1578            result,
1579            Err(TimingError::NoIterations { count: 0 })
1580        ));
1581    }
1582
1583    #[test]
1584    fn allows_zero_warmup() {
1585        let spec = BenchSpec::new("test", 5, 0).unwrap();
1586        assert_eq!(spec.warmup, 0);
1587
1588        let report = run_closure(spec, || Ok(())).unwrap();
1589        assert_eq!(report.samples.len(), 5);
1590    }
1591
1592    #[test]
1593    fn serializes_to_json() {
1594        let report = BenchReport {
1595            spec: BenchSpec::new("test", 10, 2).unwrap(),
1596            samples: vec![BenchSample {
1597                duration_ns: 1_000_000,
1598                cpu_time_ms: Some(42),
1599                peak_memory_kb: Some(512),
1600                process_peak_memory_kb: Some(1536),
1601            }],
1602            phases: vec![SemanticPhase {
1603                name: "prove".to_string(),
1604                duration_ns: 1_000_000,
1605            }],
1606            timeline: vec![HarnessTimelineSpan {
1607                phase: "measured-benchmark".to_string(),
1608                start_offset_ns: 0,
1609                end_offset_ns: 1_000_000,
1610                iteration: Some(0),
1611            }],
1612        };
1613
1614        let json = serde_json::to_string(&report).unwrap();
1615        assert!(json.contains("\"peak_memory_kb\""));
1616        assert!(json.contains("\"process_peak_memory_kb\""));
1617        assert!(!json.contains("peak_memory_growth_kb"));
1618        let restored: BenchReport = serde_json::from_str(&json).unwrap();
1619
1620        assert_eq!(restored.spec.name, "test");
1621        assert_eq!(restored.samples.len(), 1);
1622        assert_eq!(restored.samples[0].cpu_time_ms, Some(42));
1623        assert_eq!(restored.samples[0].peak_memory_kb, Some(512));
1624        assert_eq!(restored.samples[0].process_peak_memory_kb, Some(1536));
1625        assert_eq!(restored.phases.len(), 1);
1626        assert_eq!(restored.phases[0].name, "prove");
1627        assert!(restored.phases[0].duration_ns > 0);
1628    }
1629
1630    #[test]
1631    fn profile_phase_records_only_measured_iterations() {
1632        let spec = BenchSpec::new("semantic", 2, 1).unwrap();
1633        let mut call_index = 0u32;
1634        let report = run_closure(spec, || {
1635            let phase_name = if call_index == 0 {
1636                "warmup-only"
1637            } else {
1638                "prove"
1639            };
1640            call_index += 1;
1641            profile_phase(phase_name, || std::thread::sleep(Duration::from_millis(1)));
1642            Ok(())
1643        })
1644        .unwrap();
1645
1646        assert!(
1647            !report
1648                .phases
1649                .iter()
1650                .any(|phase| phase.name == "warmup-only"),
1651            "warmup phases should not be recorded"
1652        );
1653        let prove = report
1654            .phases
1655            .iter()
1656            .find(|phase| phase.name == "prove")
1657            .expect("prove phase");
1658        assert!(prove.duration_ns > 0);
1659    }
1660
1661    #[test]
1662    fn profile_phase_keeps_the_v1_model_flat() {
1663        let spec = BenchSpec::new("semantic-flat", 1, 0).unwrap();
1664        let report = run_closure(spec, || {
1665            profile_phase("prove", || {
1666                std::thread::sleep(Duration::from_millis(1));
1667                profile_phase("inner", || std::thread::sleep(Duration::from_millis(1)));
1668            });
1669            Ok(())
1670        })
1671        .unwrap();
1672
1673        assert!(report.phases.iter().any(|phase| phase.name == "prove"));
1674        assert!(
1675            !report.phases.iter().any(|phase| phase.name == "inner"),
1676            "nested phases should not create a second flat phase entry"
1677        );
1678    }
1679
1680    #[test]
1681    fn measured_cpu_excludes_warmup_iterations() {
1682        let spec = BenchSpec::new("cpu", 2, 1).unwrap();
1683        let mut monitor = FakeResourceMonitor::new(vec![
1684            IterationResourceUsage {
1685                cpu_time_ms: Some(11),
1686                peak_memory_kb: Some(32),
1687                ..Default::default()
1688            },
1689            IterationResourceUsage {
1690                cpu_time_ms: Some(17),
1691                peak_memory_kb: Some(64),
1692                ..Default::default()
1693            },
1694        ]);
1695        let mut calls = 0_u32;
1696
1697        let report = run_closure_with_monitor(spec, &mut monitor, || {
1698            calls += 1;
1699            Ok(())
1700        })
1701        .unwrap();
1702
1703        assert_eq!(calls, 3);
1704        assert_eq!(monitor.started, 2);
1705        assert_eq!(monitor.finished, 2);
1706        assert_eq!(
1707            report
1708                .samples
1709                .iter()
1710                .map(|sample| sample.cpu_time_ms)
1711                .collect::<Vec<_>>(),
1712            vec![Some(11), Some(17)]
1713        );
1714        assert_eq!(report.cpu_total_ms(), Some(28));
1715    }
1716
1717    #[test]
1718    fn measured_cpu_excludes_outer_harness_and_report_overhead() {
1719        let spec = BenchSpec::new("cpu-harness", 2, 1).unwrap();
1720        let mut monitor = FakeResourceMonitor::new(vec![
1721            IterationResourceUsage {
1722                cpu_time_ms: Some(5),
1723                peak_memory_kb: Some(12),
1724                ..Default::default()
1725            },
1726            IterationResourceUsage {
1727                cpu_time_ms: Some(7),
1728                peak_memory_kb: Some(18),
1729                ..Default::default()
1730            },
1731        ]);
1732
1733        let mut setup_calls = 0_u32;
1734        let mut teardown_calls = 0_u32;
1735        let report = run_closure_with_setup_teardown_with_monitor(
1736            spec,
1737            &mut monitor,
1738            || {
1739                setup_calls += 1;
1740                vec![1_u8, 2, 3]
1741            },
1742            |_fixture| Ok(()),
1743            |_fixture| {
1744                teardown_calls += 1;
1745            },
1746        )
1747        .unwrap();
1748
1749        let _serialized = serde_json::to_string(&report).unwrap();
1750
1751        assert_eq!(setup_calls, 1);
1752        assert_eq!(teardown_calls, 1);
1753        assert_eq!(monitor.started, 2);
1754        assert_eq!(report.cpu_total_ms(), Some(12));
1755        assert_eq!(report.cpu_median_ms(), Some(6));
1756    }
1757
1758    #[test]
1759    fn single_iteration_cpu_median_matches_the_measured_iteration() {
1760        let spec = BenchSpec::new("single", 1, 0).unwrap();
1761        let mut monitor = FakeResourceMonitor::new(vec![IterationResourceUsage {
1762            cpu_time_ms: Some(42),
1763            peak_memory_kb: Some(24),
1764            ..Default::default()
1765        }]);
1766
1767        let report = run_closure_with_monitor(spec, &mut monitor, || Ok(())).unwrap();
1768
1769        assert_eq!(report.samples[0].cpu_time_ms, Some(42));
1770        assert_eq!(report.cpu_total_ms(), Some(42));
1771        assert_eq!(report.cpu_median_ms(), Some(42));
1772    }
1773
1774    #[test]
1775    fn multiple_iterations_export_the_median_cpu_sample() {
1776        let spec = BenchSpec::new("median", 3, 0).unwrap();
1777        let mut monitor = FakeResourceMonitor::new(vec![
1778            IterationResourceUsage {
1779                cpu_time_ms: Some(19),
1780                peak_memory_kb: Some(10),
1781                ..Default::default()
1782            },
1783            IterationResourceUsage {
1784                cpu_time_ms: Some(7),
1785                peak_memory_kb: Some(30),
1786                ..Default::default()
1787            },
1788            IterationResourceUsage {
1789                cpu_time_ms: Some(11),
1790                peak_memory_kb: Some(20),
1791                ..Default::default()
1792            },
1793        ]);
1794
1795        let report = run_closure_with_monitor(spec, &mut monitor, || Ok(())).unwrap();
1796
1797        assert_eq!(report.cpu_median_ms(), Some(11));
1798        assert_eq!(report.cpu_total_ms(), Some(37));
1799    }
1800
1801    #[test]
1802    fn peak_memory_excludes_harness_baseline_overhead() {
1803        let spec = BenchSpec::new("memory", 2, 1).unwrap();
1804        let mut monitor = FakeResourceMonitor::new(vec![
1805            IterationResourceUsage {
1806                cpu_time_ms: Some(3),
1807                peak_memory_kb: Some(48),
1808                process_peak_memory_kb: Some(1_048),
1809            },
1810            IterationResourceUsage {
1811                cpu_time_ms: Some(4),
1812                peak_memory_kb: Some(96),
1813                process_peak_memory_kb: Some(1_096),
1814            },
1815        ]);
1816
1817        let report = run_closure_with_setup_teardown_with_monitor(
1818            spec,
1819            &mut monitor,
1820            || vec![0_u8; 1024],
1821            |_fixture| Ok(()),
1822            |_fixture| {},
1823        )
1824        .unwrap();
1825
1826        assert_eq!(
1827            report
1828                .samples
1829                .iter()
1830                .map(|sample| sample.peak_memory_kb)
1831                .collect::<Vec<_>>(),
1832            vec![Some(48), Some(96)]
1833        );
1834        assert_eq!(report.peak_memory_kb(), Some(96));
1835        assert_eq!(report.peak_memory_growth_kb(), report.peak_memory_kb());
1836        assert_eq!(report.process_peak_memory_kb(), Some(1_096));
1837    }
1838
1839    #[test]
1840    fn memory_peak_sampler_uses_the_first_post_startup_sample_as_its_baseline() {
1841        use std::collections::VecDeque;
1842        use std::sync::{Arc, Mutex};
1843
1844        let samples = Arc::new(Mutex::new(VecDeque::from([
1845            Some(80_u64),
1846            Some(100_u64),
1847            Some(140_u64),
1848            Some(120_u64),
1849        ])));
1850        let reader_samples = Arc::clone(&samples);
1851        let reader = Arc::new(move || {
1852            reader_samples
1853                .lock()
1854                .expect("sample queue")
1855                .pop_front()
1856                .unwrap_or(Some(120))
1857        });
1858
1859        let sampler = MemoryPeakSampler::start_with_reader(reader).expect("sampler");
1860        let peak = sampler.stop().expect("peak memory");
1861
1862        assert_eq!(
1863            peak,
1864            ProcessMemoryPeak {
1865                growth_kb: 40,
1866                process_peak_kb: 140,
1867            }
1868        );
1869    }
1870
1871    #[test]
1872    fn run_with_setup_calls_setup_once() {
1873        use std::sync::atomic::{AtomicU32, Ordering};
1874
1875        static SETUP_COUNT: AtomicU32 = AtomicU32::new(0);
1876        static RUN_COUNT: AtomicU32 = AtomicU32::new(0);
1877
1878        let spec = BenchSpec::new("test", 5, 2).unwrap();
1879        let report = run_closure_with_setup(
1880            spec,
1881            || {
1882                SETUP_COUNT.fetch_add(1, Ordering::SeqCst);
1883                vec![1, 2, 3]
1884            },
1885            |data| {
1886                RUN_COUNT.fetch_add(1, Ordering::SeqCst);
1887                std::hint::black_box(data.len());
1888                Ok(())
1889            },
1890        )
1891        .unwrap();
1892
1893        assert_eq!(SETUP_COUNT.load(Ordering::SeqCst), 1); // Setup called once
1894        assert_eq!(RUN_COUNT.load(Ordering::SeqCst), 7); // 2 warmup + 5 iterations
1895        assert_eq!(report.samples.len(), 5);
1896    }
1897
1898    #[test]
1899    fn run_with_setup_per_iter_calls_setup_each_time() {
1900        use std::sync::atomic::{AtomicU32, Ordering};
1901
1902        static SETUP_COUNT: AtomicU32 = AtomicU32::new(0);
1903
1904        let spec = BenchSpec::new("test", 3, 1).unwrap();
1905        let report = run_closure_with_setup_per_iter(
1906            spec,
1907            || {
1908                SETUP_COUNT.fetch_add(1, Ordering::SeqCst);
1909                vec![1, 2, 3]
1910            },
1911            |data| {
1912                std::hint::black_box(data);
1913                Ok(())
1914            },
1915        )
1916        .unwrap();
1917
1918        assert_eq!(SETUP_COUNT.load(Ordering::SeqCst), 4); // 1 warmup + 3 iterations
1919        assert_eq!(report.samples.len(), 3);
1920    }
1921
1922    #[test]
1923    fn run_with_setup_teardown_calls_both() {
1924        use std::sync::atomic::{AtomicU32, Ordering};
1925
1926        static SETUP_COUNT: AtomicU32 = AtomicU32::new(0);
1927        static TEARDOWN_COUNT: AtomicU32 = AtomicU32::new(0);
1928
1929        let spec = BenchSpec::new("test", 3, 1).unwrap();
1930        let report = run_closure_with_setup_teardown(
1931            spec,
1932            || {
1933                SETUP_COUNT.fetch_add(1, Ordering::SeqCst);
1934                "resource"
1935            },
1936            |_resource| Ok(()),
1937            |_resource| {
1938                TEARDOWN_COUNT.fetch_add(1, Ordering::SeqCst);
1939            },
1940        )
1941        .unwrap();
1942
1943        assert_eq!(SETUP_COUNT.load(Ordering::SeqCst), 1);
1944        assert_eq!(TEARDOWN_COUNT.load(Ordering::SeqCst), 1);
1945        assert_eq!(report.samples.len(), 3);
1946    }
1947
1948    #[test]
1949    fn bench_report_serializes_exact_harness_timeline() {
1950        let spec = BenchSpec::new("timeline", 2, 1).unwrap();
1951        let report = run_closure_with_setup_teardown(
1952            spec,
1953            || {
1954                std::thread::sleep(Duration::from_millis(1));
1955                "resource"
1956            },
1957            |_resource| {
1958                std::thread::sleep(Duration::from_millis(1));
1959                Ok(())
1960            },
1961            |_resource| {
1962                std::thread::sleep(Duration::from_millis(1));
1963            },
1964        )
1965        .unwrap();
1966
1967        let json = serde_json::to_value(&report).unwrap();
1968        assert_eq!(json["timeline"][0]["phase"], "setup");
1969        assert_eq!(json["timeline"][1]["phase"], "warmup-benchmark");
1970        assert_eq!(json["timeline"][2]["phase"], "measured-benchmark");
1971        assert_eq!(json["timeline"][3]["phase"], "measured-benchmark");
1972        assert_eq!(json["timeline"][4]["phase"], "teardown");
1973    }
1974}