Skip to main content

mobench_sdk/
timing.rs

1//! Lightweight benchmarking harness for mobile platforms.
2//!
3//! This module provides the core timing infrastructure for the mobench ecosystem.
4//! It was previously a separate crate (`mobench-runner`) but has been consolidated
5//! into `mobench-sdk` for a simpler dependency graph.
6//!
7//! The module is designed to be minimal and portable, with no platform-specific
8//! dependencies, making it suitable for compilation to Android and iOS targets.
9//!
10//! ## Overview
11//!
12//! The timing module executes benchmark functions with:
13//! - Configurable warmup iterations
14//! - Precise nanosecond-resolution timing
15//! - Simple, serializable results
16//!
17//! ## Usage
18//!
19//! Most users should use this via the higher-level [`crate::run_benchmark`] function
20//! or [`crate::BenchmarkBuilder`]. Direct usage is for custom integrations:
21//!
22//! ```
23//! use mobench_sdk::timing::{BenchSpec, run_closure, TimingError};
24//!
25//! // Define a benchmark specification
26//! let spec = BenchSpec::new("my_benchmark", 100, 10)?;
27//!
28//! // Run the benchmark
29//! let report = run_closure(spec, || {
30//!     // Your benchmark code
31//!     let sum: u64 = (0..1000).sum();
32//!     std::hint::black_box(sum);
33//!     Ok(())
34//! })?;
35//!
36//! // Analyze results
37//! let mean_ns = report.samples.iter()
38//!     .map(|s| s.duration_ns)
39//!     .sum::<u64>() / report.samples.len() as u64;
40//!
41//! println!("Mean: {} ns", mean_ns);
42//! # Ok::<(), TimingError>(())
43//! ```
44//!
45//! ## Types
46//!
47//! | Type | Description |
48//! |------|-------------|
49//! | [`BenchSpec`] | Benchmark configuration (name, iterations, warmup) |
50//! | [`BenchSample`] | Single timing measurement in nanoseconds |
51//! | [`BenchReport`] | Complete results with all samples |
52//! | [`TimingError`] | Error conditions during benchmarking |
53//!
54//! ## Feature Flags
55//!
56//! This module is always available. When using `mobench-sdk` with default features,
57//! you also get build automation and template generation. For minimal binary size
58//! (e.g., on mobile targets), use the `runner-only` feature:
59//!
60//! ```toml
61//! [dependencies]
62//! mobench-sdk = { version = "0.1", default-features = false, features = ["runner-only"] }
63//! ```
64
65use serde::{Deserialize, Serialize};
66use std::cell::RefCell;
67use std::sync::{
68    Arc,
69    atomic::{AtomicBool, AtomicU64, Ordering},
70    mpsc,
71};
72use std::thread::{self, JoinHandle};
73use std::time::{Duration, Instant};
74use thiserror::Error;
75
76/// Benchmark specification defining what and how to benchmark.
77///
78/// Contains the benchmark name, number of measurement iterations, and
79/// warmup iterations to perform before measuring.
80///
81/// # Example
82///
83/// ```
84/// use mobench_sdk::timing::BenchSpec;
85///
86/// // Create a spec for 100 iterations with 10 warmup runs
87/// let spec = BenchSpec::new("sorting_benchmark", 100, 10)?;
88///
89/// assert_eq!(spec.name, "sorting_benchmark");
90/// assert_eq!(spec.iterations, 100);
91/// assert_eq!(spec.warmup, 10);
92/// # Ok::<(), mobench_sdk::timing::TimingError>(())
93/// ```
94///
95/// # Serialization
96///
97/// `BenchSpec` implements `Serialize` and `Deserialize` for JSON persistence:
98///
99/// ```
100/// use mobench_sdk::timing::BenchSpec;
101///
102/// let spec = BenchSpec {
103///     name: "my_bench".to_string(),
104///     iterations: 50,
105///     warmup: 5,
106/// };
107///
108/// let json = serde_json::to_string(&spec)?;
109/// let restored: BenchSpec = serde_json::from_str(&json)?;
110///
111/// assert_eq!(spec.name, restored.name);
112/// # Ok::<(), serde_json::Error>(())
113/// ```
114#[derive(Clone, Debug, Serialize, Deserialize)]
115pub struct BenchSpec {
116    /// Name of the benchmark, typically the fully-qualified function name.
117    ///
118    /// Examples: `"my_crate::fibonacci"`, `"sorting_benchmark"`
119    pub name: String,
120
121    /// Number of iterations to measure.
122    ///
123    /// Each iteration produces one [`BenchSample`]. Must be greater than zero.
124    pub iterations: u32,
125
126    /// Number of warmup iterations before measurement.
127    ///
128    /// Warmup iterations are not recorded. They allow CPU caches to warm
129    /// and any JIT compilation to complete. Can be zero.
130    pub warmup: u32,
131}
132
133impl BenchSpec {
134    /// Creates a new benchmark specification.
135    ///
136    /// # Arguments
137    ///
138    /// * `name` - Name identifier for the benchmark
139    /// * `iterations` - Number of measured iterations (must be > 0)
140    /// * `warmup` - Number of warmup iterations (can be 0)
141    ///
142    /// # Errors
143    ///
144    /// Returns [`TimingError::NoIterations`] if `iterations` is zero.
145    ///
146    /// # Example
147    ///
148    /// ```
149    /// use mobench_sdk::timing::BenchSpec;
150    ///
151    /// let spec = BenchSpec::new("test", 100, 10)?;
152    /// assert_eq!(spec.iterations, 100);
153    ///
154    /// // Zero iterations is an error
155    /// let err = BenchSpec::new("test", 0, 10);
156    /// assert!(err.is_err());
157    /// # Ok::<(), mobench_sdk::timing::TimingError>(())
158    /// ```
159    pub fn new(name: impl Into<String>, iterations: u32, warmup: u32) -> Result<Self, TimingError> {
160        if iterations == 0 {
161            return Err(TimingError::NoIterations { count: iterations });
162        }
163
164        Ok(Self {
165            name: name.into(),
166            iterations,
167            warmup,
168        })
169    }
170}
171
172/// A single timing sample from a benchmark iteration.
173///
174/// Contains the elapsed time in nanoseconds for one execution of the
175/// benchmark function.
176///
177/// # Example
178///
179/// ```
180/// use mobench_sdk::timing::BenchSample;
181///
182/// let sample = BenchSample {
183///     duration_ns: 1_500_000,
184///     ..Default::default()
185/// };
186///
187/// // Convert to milliseconds
188/// let ms = sample.duration_ns as f64 / 1_000_000.0;
189/// assert_eq!(ms, 1.5);
190/// ```
191#[derive(Clone, Debug, Default, Serialize, Deserialize)]
192pub struct BenchSample {
193    /// Duration of the iteration in nanoseconds.
194    ///
195    /// Measured using [`std::time::Instant`] for monotonic, high-resolution timing.
196    pub duration_ns: u64,
197
198    /// CPU time consumed by the measured iteration in milliseconds.
199    ///
200    /// This is captured around the measured benchmark closure only and excludes
201    /// warmup, setup, teardown, and report generation overhead.
202    #[serde(default, skip_serializing_if = "Option::is_none")]
203    pub cpu_time_ms: Option<u64>,
204
205    /// Peak memory growth during the measured iteration in kilobytes.
206    ///
207    /// Values are baseline-adjusted immediately before the measured closure
208    /// enters so harness footprint is not counted.
209    #[serde(default, skip_serializing_if = "Option::is_none")]
210    pub peak_memory_kb: Option<u64>,
211}
212
213impl BenchSample {
214    fn from_measurement(duration: Duration, resources: IterationResourceUsage) -> Self {
215        Self {
216            duration_ns: duration.as_nanos() as u64,
217            cpu_time_ms: resources.cpu_time_ms,
218            peak_memory_kb: resources.peak_memory_kb,
219        }
220    }
221}
222
223/// Complete benchmark report with all timing samples.
224///
225/// Contains the original specification and all collected samples.
226/// Can be serialized to JSON for storage or transmission.
227///
228/// # Example
229///
230/// ```
231/// use mobench_sdk::timing::{BenchSpec, run_closure};
232///
233/// let spec = BenchSpec::new("example", 50, 5)?;
234/// let report = run_closure(spec, || {
235///     std::hint::black_box(42);
236///     Ok(())
237/// })?;
238///
239/// // Calculate statistics
240/// let samples: Vec<u64> = report.samples.iter()
241///     .map(|s| s.duration_ns)
242///     .collect();
243///
244/// let min = samples.iter().min().unwrap();
245/// let max = samples.iter().max().unwrap();
246/// let mean = samples.iter().sum::<u64>() / samples.len() as u64;
247///
248/// println!("Min: {} ns, Max: {} ns, Mean: {} ns", min, max, mean);
249/// # Ok::<(), mobench_sdk::timing::TimingError>(())
250/// ```
251#[derive(Clone, Debug, Serialize, Deserialize)]
252pub struct BenchReport {
253    /// The specification used for this benchmark run.
254    pub spec: BenchSpec,
255
256    /// All collected timing samples.
257    ///
258    /// The length equals `spec.iterations`. Samples are in execution order.
259    pub samples: Vec<BenchSample>,
260
261    /// Optional semantic phase timings captured during measured iterations.
262    pub phases: Vec<SemanticPhase>,
263
264    /// Exact harness timeline spans in execution order.
265    pub timeline: Vec<HarnessTimelineSpan>,
266}
267
268#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
269pub struct HarnessTimelineSpan {
270    pub phase: String,
271    pub start_offset_ns: u64,
272    pub end_offset_ns: u64,
273    pub iteration: Option<u32>,
274}
275
276impl BenchReport {
277    /// Returns the mean (average) duration in nanoseconds.
278    #[must_use]
279    pub fn mean_ns(&self) -> f64 {
280        if self.samples.is_empty() {
281            return 0.0;
282        }
283        let sum: u64 = self.samples.iter().map(|s| s.duration_ns).sum();
284        sum as f64 / self.samples.len() as f64
285    }
286
287    /// Returns the median duration in nanoseconds.
288    #[must_use]
289    pub fn median_ns(&self) -> f64 {
290        if self.samples.is_empty() {
291            return 0.0;
292        }
293        let mut sorted: Vec<u64> = self.samples.iter().map(|s| s.duration_ns).collect();
294        sorted.sort_unstable();
295        let len = sorted.len();
296        if len % 2 == 0 {
297            (sorted[len / 2 - 1] + sorted[len / 2]) as f64 / 2.0
298        } else {
299            sorted[len / 2] as f64
300        }
301    }
302
303    /// Returns the standard deviation in nanoseconds (sample std dev, n-1).
304    #[must_use]
305    pub fn std_dev_ns(&self) -> f64 {
306        if self.samples.len() < 2 {
307            return 0.0;
308        }
309        let mean = self.mean_ns();
310        let variance: f64 = self
311            .samples
312            .iter()
313            .map(|s| {
314                let diff = s.duration_ns as f64 - mean;
315                diff * diff
316            })
317            .sum::<f64>()
318            / (self.samples.len() - 1) as f64;
319        variance.sqrt()
320    }
321
322    /// Returns the given percentile (0-100) in nanoseconds.
323    #[must_use]
324    pub fn percentile_ns(&self, p: f64) -> f64 {
325        if self.samples.is_empty() {
326            return 0.0;
327        }
328        let mut sorted: Vec<u64> = self.samples.iter().map(|s| s.duration_ns).collect();
329        sorted.sort_unstable();
330        let p = p.clamp(0.0, 100.0) / 100.0;
331        let index = (p * (sorted.len() - 1) as f64).round() as usize;
332        sorted[index.min(sorted.len() - 1)] as f64
333    }
334
335    /// Returns the minimum duration in nanoseconds.
336    #[must_use]
337    pub fn min_ns(&self) -> u64 {
338        self.samples
339            .iter()
340            .map(|s| s.duration_ns)
341            .min()
342            .unwrap_or(0)
343    }
344
345    /// Returns the maximum duration in nanoseconds.
346    #[must_use]
347    pub fn max_ns(&self) -> u64 {
348        self.samples
349            .iter()
350            .map(|s| s.duration_ns)
351            .max()
352            .unwrap_or(0)
353    }
354
355    /// Returns the total measured CPU time in milliseconds across all iterations.
356    #[must_use]
357    pub fn cpu_total_ms(&self) -> Option<u64> {
358        let values = self
359            .samples
360            .iter()
361            .filter_map(|sample| sample.cpu_time_ms)
362            .collect::<Vec<_>>();
363        if values.is_empty() {
364            return None;
365        }
366
367        let total = values
368            .iter()
369            .fold(0_u128, |sum, value| sum.saturating_add(u128::from(*value)));
370        Some(total.min(u128::from(u64::MAX)) as u64)
371    }
372
373    /// Returns the median measured CPU time in milliseconds across all iterations.
374    #[must_use]
375    pub fn cpu_median_ms(&self) -> Option<u64> {
376        let mut values = self
377            .samples
378            .iter()
379            .filter_map(|sample| sample.cpu_time_ms)
380            .collect::<Vec<_>>();
381        if values.is_empty() {
382            return None;
383        }
384
385        values.sort_unstable();
386        let len = values.len();
387        Some(if len % 2 == 0 {
388            let lower = u128::from(values[(len / 2) - 1]);
389            let upper = u128::from(values[len / 2]);
390            ((lower + upper) / 2) as u64
391        } else {
392            values[len / 2]
393        })
394    }
395
396    /// Returns the maximum baseline-adjusted peak memory growth in kilobytes.
397    #[must_use]
398    pub fn peak_memory_kb(&self) -> Option<u64> {
399        self.samples
400            .iter()
401            .filter_map(|sample| sample.peak_memory_kb)
402            .max()
403    }
404
405    /// Returns a statistical summary of the benchmark results.
406    #[must_use]
407    pub fn summary(&self) -> BenchSummary {
408        BenchSummary {
409            name: self.spec.name.clone(),
410            iterations: self.samples.len() as u32,
411            warmup: self.spec.warmup,
412            mean_ns: self.mean_ns(),
413            median_ns: self.median_ns(),
414            std_dev_ns: self.std_dev_ns(),
415            min_ns: self.min_ns(),
416            max_ns: self.max_ns(),
417            p95_ns: self.percentile_ns(95.0),
418            p99_ns: self.percentile_ns(99.0),
419        }
420    }
421}
422
423#[derive(Clone, Debug, Default)]
424struct IterationResourceUsage {
425    cpu_time_ms: Option<u64>,
426    peak_memory_kb: Option<u64>,
427}
428
429fn instant_offset_ns(origin: Instant, instant: Instant) -> u64 {
430    instant
431        .duration_since(origin)
432        .as_nanos()
433        .min(u128::from(u64::MAX)) as u64
434}
435
436fn push_timeline_span(
437    timeline: &mut Vec<HarnessTimelineSpan>,
438    origin: Instant,
439    phase: &str,
440    started_at: Instant,
441    ended_at: Instant,
442    iteration: Option<u32>,
443) {
444    timeline.push(HarnessTimelineSpan {
445        phase: phase.to_string(),
446        start_offset_ns: instant_offset_ns(origin, started_at),
447        end_offset_ns: instant_offset_ns(origin, ended_at),
448        iteration,
449    });
450}
451
452/// Statistical summary of benchmark results.
453#[derive(Clone, Debug, Serialize, Deserialize)]
454pub struct BenchSummary {
455    /// Name of the benchmark.
456    pub name: String,
457    /// Number of measured iterations.
458    pub iterations: u32,
459    /// Number of warmup iterations.
460    pub warmup: u32,
461    /// Mean duration in nanoseconds.
462    pub mean_ns: f64,
463    /// Median duration in nanoseconds.
464    pub median_ns: f64,
465    /// Standard deviation in nanoseconds.
466    pub std_dev_ns: f64,
467    /// Minimum duration in nanoseconds.
468    pub min_ns: u64,
469    /// Maximum duration in nanoseconds.
470    pub max_ns: u64,
471    /// 95th percentile in nanoseconds.
472    pub p95_ns: f64,
473    /// 99th percentile in nanoseconds.
474    pub p99_ns: f64,
475}
476
477/// Flat semantic phase timing captured during a benchmark run.
478#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
479pub struct SemanticPhase {
480    pub name: String,
481    pub duration_ns: u64,
482}
483
484#[derive(Default)]
485struct SemanticPhaseCollector {
486    enabled: bool,
487    depth: usize,
488    phases: Vec<SemanticPhase>,
489}
490
491impl SemanticPhaseCollector {
492    fn reset(&mut self) {
493        self.enabled = false;
494        self.depth = 0;
495        self.phases.clear();
496    }
497
498    fn begin_measurement(&mut self) {
499        self.reset();
500        self.enabled = true;
501    }
502
503    fn finish(&mut self) -> Vec<SemanticPhase> {
504        self.enabled = false;
505        self.depth = 0;
506        std::mem::take(&mut self.phases)
507    }
508
509    fn enter_phase(&mut self) -> Option<bool> {
510        if !self.enabled {
511            return None;
512        }
513        let top_level = self.depth == 0;
514        self.depth += 1;
515        Some(top_level)
516    }
517
518    fn exit_phase(&mut self, name: &str, top_level: bool, elapsed: Duration) {
519        self.depth = self.depth.saturating_sub(1);
520        if !self.enabled || !top_level {
521            return;
522        }
523
524        let duration_ns = elapsed.as_nanos().min(u128::from(u64::MAX)) as u64;
525        if let Some(phase) = self.phases.iter_mut().find(|phase| phase.name == name) {
526            phase.duration_ns = phase.duration_ns.saturating_add(duration_ns);
527        } else {
528            self.phases.push(SemanticPhase {
529                name: name.to_string(),
530                duration_ns,
531            });
532        }
533    }
534}
535
536thread_local! {
537    static SEMANTIC_PHASE_COLLECTOR: RefCell<SemanticPhaseCollector> =
538        RefCell::new(SemanticPhaseCollector::default());
539}
540
541struct SemanticPhaseGuard {
542    name: String,
543    started_at: Option<Instant>,
544    top_level: bool,
545}
546
547impl Drop for SemanticPhaseGuard {
548    fn drop(&mut self) {
549        let Some(started_at) = self.started_at else {
550            return;
551        };
552
553        let elapsed = started_at.elapsed();
554        SEMANTIC_PHASE_COLLECTOR.with(|collector| {
555            collector
556                .borrow_mut()
557                .exit_phase(&self.name, self.top_level, elapsed);
558        });
559    }
560}
561
562fn reset_semantic_phase_collection() {
563    SEMANTIC_PHASE_COLLECTOR.with(|collector| collector.borrow_mut().reset());
564}
565
566fn begin_semantic_phase_collection() {
567    SEMANTIC_PHASE_COLLECTOR.with(|collector| collector.borrow_mut().begin_measurement());
568}
569
570fn finish_semantic_phase_collection() -> Vec<SemanticPhase> {
571    SEMANTIC_PHASE_COLLECTOR.with(|collector| collector.borrow_mut().finish())
572}
573
574trait ResourceMonitor {
575    type Token;
576
577    fn start(&mut self) -> Self::Token;
578
579    fn finish(&mut self, token: Self::Token) -> IterationResourceUsage;
580}
581
582#[derive(Default)]
583struct DefaultResourceMonitor;
584
585#[derive(Clone, Copy, Debug, PartialEq, Eq)]
586struct ProcessCpuTimeSnapshot {
587    user_ns: u64,
588    system_ns: u64,
589}
590
591impl ProcessCpuTimeSnapshot {
592    #[cfg(unix)]
593    fn from_rusage_timevals(user: libc::timeval, system: libc::timeval) -> Option<Self> {
594        Some(Self {
595            user_ns: timeval_to_ns(user)?,
596            system_ns: timeval_to_ns(system)?,
597        })
598    }
599
600    fn total_ns(self) -> u64 {
601        self.user_ns.saturating_add(self.system_ns)
602    }
603}
604
605struct DefaultResourceToken {
606    cpu_time_start: Option<ProcessCpuTimeSnapshot>,
607    memory_sampler: Option<MemoryPeakSampler>,
608}
609
610impl ResourceMonitor for DefaultResourceMonitor {
611    type Token = DefaultResourceToken;
612
613    fn start(&mut self) -> Self::Token {
614        Self::Token {
615            cpu_time_start: current_process_cpu_time(),
616            memory_sampler: MemoryPeakSampler::start(),
617        }
618    }
619
620    fn finish(&mut self, token: Self::Token) -> IterationResourceUsage {
621        let cpu_time_ms = token
622            .cpu_time_start
623            .zip(current_process_cpu_time())
624            .and_then(|(start, end)| process_cpu_delta_ms(start, end));
625
626        IterationResourceUsage {
627            cpu_time_ms,
628            peak_memory_kb: token
629                .memory_sampler
630                .and_then(MemoryPeakSampler::stop)
631                .filter(|value| *value > 0),
632        }
633    }
634}
635
636fn round_ns_to_ms(ns: u64) -> u64 {
637    ((u128::from(ns) + 500_000) / 1_000_000) as u64
638}
639
640#[cfg(unix)]
641fn process_cpu_delta_ms(start: ProcessCpuTimeSnapshot, end: ProcessCpuTimeSnapshot) -> Option<u64> {
642    Some(round_ns_to_ms(
643        end.total_ns().checked_sub(start.total_ns())?,
644    ))
645}
646
647#[cfg(not(unix))]
648fn process_cpu_delta_ms(
649    _start: ProcessCpuTimeSnapshot,
650    _end: ProcessCpuTimeSnapshot,
651) -> Option<u64> {
652    None
653}
654
655#[cfg(unix)]
656fn timeval_to_ns(value: libc::timeval) -> Option<u64> {
657    let secs = u64::try_from(value.tv_sec).ok()?;
658    let micros = u64::try_from(value.tv_usec).ok()?;
659    Some(
660        secs.saturating_mul(1_000_000_000)
661            .saturating_add(micros.saturating_mul(1_000)),
662    )
663}
664
665#[cfg(unix)]
666fn current_process_cpu_time() -> Option<ProcessCpuTimeSnapshot> {
667    let mut usage = std::mem::MaybeUninit::<libc::rusage>::uninit();
668    let rc = unsafe { libc::getrusage(libc::RUSAGE_SELF, usage.as_mut_ptr()) };
669    if rc != 0 {
670        return None;
671    }
672
673    let usage = unsafe { usage.assume_init() };
674    ProcessCpuTimeSnapshot::from_rusage_timevals(usage.ru_utime, usage.ru_stime)
675}
676
677#[cfg(not(unix))]
678fn current_process_cpu_time() -> Option<ProcessCpuTimeSnapshot> {
679    None
680}
681
682const MEMORY_SAMPLER_INTERVAL: Duration = Duration::from_millis(1);
683type MemoryReader = Arc<dyn Fn() -> Option<u64> + Send + Sync + 'static>;
684
685struct MemoryPeakSampler {
686    baseline_kb: u64,
687    stop_flag: Arc<AtomicBool>,
688    peak_kb: Arc<AtomicU64>,
689    handle: JoinHandle<()>,
690}
691
692impl MemoryPeakSampler {
693    fn start() -> Option<Self> {
694        Self::start_with_reader(Arc::new(|| current_process_memory_kb()))
695    }
696
697    fn start_with_reader(reader: MemoryReader) -> Option<Self> {
698        let stop_flag = Arc::new(AtomicBool::new(false));
699        let peak_kb = Arc::new(AtomicU64::new(0));
700        let (ready_tx, ready_rx) = mpsc::sync_channel(1);
701        let (baseline_tx, baseline_rx) = mpsc::sync_channel(1);
702        let sampler_stop = Arc::clone(&stop_flag);
703        let sampler_peak = Arc::clone(&peak_kb);
704        let sampler_reader = Arc::clone(&reader);
705
706        let handle = thread::Builder::new()
707            .name("mobench-memory-sampler".to_string())
708            .spawn(move || {
709                // Touch the sampler thread's own stack and runtime state before the
710                // benchmark baseline is captured so its overhead is not reported as
711                // measured benchmark memory.
712                let _ = sampler_reader();
713                let _ = ready_tx.send(());
714
715                let Some(baseline_kb) = baseline_rx.recv().ok().flatten() else {
716                    return;
717                };
718                sampler_peak.store(baseline_kb, Ordering::Release);
719
720                while !sampler_stop.load(Ordering::Acquire) {
721                    if let Some(current_kb) = sampler_reader() {
722                        update_atomic_max(&sampler_peak, current_kb);
723                    }
724                    thread::sleep(MEMORY_SAMPLER_INTERVAL);
725                }
726
727                if let Some(current_kb) = sampler_reader() {
728                    update_atomic_max(&sampler_peak, current_kb);
729                }
730            })
731            .ok()?;
732
733        if ready_rx.recv().is_err() {
734            stop_flag.store(true, Ordering::Release);
735            let _ = handle.join();
736            return None;
737        }
738
739        let baseline_kb = match reader() {
740            Some(value) => value,
741            None => {
742                let _ = baseline_tx.send(None);
743                stop_flag.store(true, Ordering::Release);
744                let _ = handle.join();
745                return None;
746            }
747        };
748        if baseline_tx.send(Some(baseline_kb)).is_err() {
749            stop_flag.store(true, Ordering::Release);
750            let _ = handle.join();
751            return None;
752        }
753
754        Some(Self {
755            baseline_kb,
756            stop_flag,
757            peak_kb,
758            handle,
759        })
760    }
761
762    fn stop(self) -> Option<u64> {
763        self.stop_flag.store(true, Ordering::Release);
764        let _ = self.handle.join();
765        let peak_kb = self.peak_kb.load(Ordering::Acquire);
766        Some(peak_kb.saturating_sub(self.baseline_kb))
767    }
768}
769
770fn update_atomic_max(target: &AtomicU64, value: u64) {
771    let mut current = target.load(Ordering::Relaxed);
772    while value > current {
773        match target.compare_exchange_weak(current, value, Ordering::Relaxed, Ordering::Relaxed) {
774            Ok(_) => break,
775            Err(observed) => current = observed,
776        }
777    }
778}
779
780#[cfg(any(target_os = "android", target_os = "linux"))]
781fn current_process_memory_kb() -> Option<u64> {
782    let statm = std::fs::read_to_string("/proc/self/statm").ok()?;
783    let resident_pages = statm
784        .split_whitespace()
785        .nth(1)
786        .and_then(|value| value.parse::<u64>().ok())?;
787    let page_size = unsafe { libc::sysconf(libc::_SC_PAGESIZE) };
788    if page_size <= 0 {
789        return None;
790    }
791    let page_size = u64::try_from(page_size).ok()?;
792    Some(resident_pages.saturating_mul(page_size) / 1024)
793}
794
795#[cfg(any(target_os = "ios", target_os = "macos"))]
796fn current_process_memory_kb() -> Option<u64> {
797    let mut info = std::mem::MaybeUninit::<libc::mach_task_basic_info_data_t>::uninit();
798    let mut count = libc::MACH_TASK_BASIC_INFO_COUNT;
799    #[allow(deprecated)]
800    let rc = unsafe {
801        libc::task_info(
802            libc::mach_task_self(),
803            libc::MACH_TASK_BASIC_INFO,
804            info.as_mut_ptr().cast::<libc::integer_t>(),
805            &mut count,
806        )
807    };
808    if rc != libc::KERN_SUCCESS {
809        return None;
810    }
811
812    let info = unsafe { info.assume_init() };
813    Some((info.resident_size / 1024) as u64)
814}
815
816#[cfg(not(any(
817    target_os = "android",
818    target_os = "linux",
819    target_os = "ios",
820    target_os = "macos"
821)))]
822fn current_process_memory_kb() -> Option<u64> {
823    None
824}
825
826fn measure_iteration<M, F>(
827    monitor: &mut M,
828    f: F,
829) -> Result<(BenchSample, Instant, Instant), TimingError>
830where
831    M: ResourceMonitor,
832    F: FnOnce() -> Result<(), TimingError>,
833{
834    let token = monitor.start();
835    let started_at = Instant::now();
836    let result = f();
837    let ended_at = Instant::now();
838    let resources = monitor.finish(token);
839    result.map(|_| {
840        (
841            BenchSample::from_measurement(ended_at.duration_since(started_at), resources),
842            started_at,
843            ended_at,
844        )
845    })
846}
847
848/// Records a flat semantic phase when called inside an active benchmark measurement loop.
849///
850/// Phases are aggregated across measured iterations and ignored during warmup/setup.
851/// Nested phases are intentionally collapsed in v1 to keep the output flat.
852pub fn profile_phase<T>(name: &str, f: impl FnOnce() -> T) -> T {
853    let guard = SEMANTIC_PHASE_COLLECTOR.with(|collector| {
854        let mut collector = collector.borrow_mut();
855        match collector.enter_phase() {
856            Some(top_level) => SemanticPhaseGuard {
857                name: name.to_string(),
858                started_at: Some(Instant::now()),
859                top_level,
860            },
861            None => SemanticPhaseGuard {
862                name: String::new(),
863                started_at: None,
864                top_level: false,
865            },
866        }
867    });
868
869    let result = f();
870    drop(guard);
871    result
872}
873
874/// Errors that can occur during benchmark execution.
875///
876/// # Example
877///
878/// ```
879/// use mobench_sdk::timing::{BenchSpec, TimingError};
880///
881/// // Zero iterations produces an error
882/// let result = BenchSpec::new("test", 0, 10);
883/// assert!(matches!(result, Err(TimingError::NoIterations { .. })));
884/// ```
885#[derive(Debug, Error)]
886pub enum TimingError {
887    /// The iteration count was zero or invalid.
888    ///
889    /// At least one iteration is required to produce a measurement.
890    /// The error includes the actual value provided for diagnostic purposes.
891    #[error("iterations must be greater than zero (got {count}). Minimum recommended: 10")]
892    NoIterations {
893        /// The invalid iteration count that was provided.
894        count: u32,
895    },
896
897    /// The benchmark function failed during execution.
898    ///
899    /// Contains a description of the failure.
900    #[error("benchmark function failed: {0}")]
901    Execution(String),
902}
903
904/// Runs a benchmark by executing a closure repeatedly.
905///
906/// This is the core benchmarking function. It:
907///
908/// 1. Executes the closure `spec.warmup` times without recording
909/// 2. Executes the closure `spec.iterations` times, recording each duration
910/// 3. Returns a [`BenchReport`] with all samples
911///
912/// # Arguments
913///
914/// * `spec` - Benchmark configuration specifying iterations and warmup
915/// * `f` - Closure to benchmark; must return `Result<(), TimingError>`
916///
917/// # Returns
918///
919/// A [`BenchReport`] containing all timing samples, or a [`TimingError`] if
920/// the benchmark fails.
921///
922/// # Example
923///
924/// ```
925/// use mobench_sdk::timing::{BenchSpec, run_closure, TimingError};
926///
927/// let spec = BenchSpec::new("sum_benchmark", 100, 10)?;
928///
929/// let report = run_closure(spec, || {
930///     let sum: u64 = (0..1000).sum();
931///     std::hint::black_box(sum);
932///     Ok(())
933/// })?;
934///
935/// assert_eq!(report.samples.len(), 100);
936///
937/// // Calculate mean duration
938/// let total_ns: u64 = report.samples.iter().map(|s| s.duration_ns).sum();
939/// let mean_ns = total_ns / report.samples.len() as u64;
940/// println!("Mean: {} ns", mean_ns);
941/// # Ok::<(), TimingError>(())
942/// ```
943///
944/// # Error Handling
945///
946/// If the closure returns an error, the benchmark stops immediately:
947///
948/// ```
949/// use mobench_sdk::timing::{BenchSpec, run_closure, TimingError};
950///
951/// let spec = BenchSpec::new("failing_bench", 100, 0)?;
952///
953/// let result = run_closure(spec, || {
954///     Err(TimingError::Execution("simulated failure".into()))
955/// });
956///
957/// assert!(result.is_err());
958/// # Ok::<(), TimingError>(())
959/// ```
960///
961/// # Timing Precision
962///
963/// Uses [`std::time::Instant`] for timing, which provides monotonic,
964/// nanosecond-resolution measurements on most platforms.
965pub fn run_closure<F>(spec: BenchSpec, mut f: F) -> Result<BenchReport, TimingError>
966where
967    F: FnMut() -> Result<(), TimingError>,
968{
969    let mut monitor = DefaultResourceMonitor;
970    run_closure_with_monitor(spec, &mut monitor, move || f())
971}
972
973fn run_closure_with_monitor<F, M>(
974    spec: BenchSpec,
975    monitor: &mut M,
976    mut f: F,
977) -> Result<BenchReport, TimingError>
978where
979    F: FnMut() -> Result<(), TimingError>,
980    M: ResourceMonitor,
981{
982    if spec.iterations == 0 {
983        return Err(TimingError::NoIterations {
984            count: spec.iterations,
985        });
986    }
987
988    reset_semantic_phase_collection();
989    let harness_origin = Instant::now();
990    let mut timeline = Vec::new();
991
992    // Warmup phase - not measured
993    for iteration in 0..spec.warmup {
994        let phase_start = Instant::now();
995        f()?;
996        push_timeline_span(
997            &mut timeline,
998            harness_origin,
999            "warmup-benchmark",
1000            phase_start,
1001            Instant::now(),
1002            Some(iteration),
1003        );
1004    }
1005
1006    // Measurement phase
1007    begin_semantic_phase_collection();
1008    let mut samples = Vec::with_capacity(spec.iterations as usize);
1009    for iteration in 0..spec.iterations {
1010        let (sample, start, end) = match measure_iteration(monitor, || f()) {
1011            Ok(measurement) => measurement,
1012            Err(err) => {
1013                let _ = finish_semantic_phase_collection();
1014                return Err(err);
1015            }
1016        };
1017        samples.push(sample);
1018        push_timeline_span(
1019            &mut timeline,
1020            harness_origin,
1021            "measured-benchmark",
1022            start,
1023            end,
1024            Some(iteration),
1025        );
1026    }
1027    let phases = finish_semantic_phase_collection();
1028
1029    Ok(BenchReport {
1030        spec,
1031        samples,
1032        phases,
1033        timeline,
1034    })
1035}
1036
1037/// Runs a benchmark with setup that executes once before all iterations.
1038///
1039/// The setup function is called once before timing begins, then the benchmark
1040/// runs multiple times using a reference to the setup result. This is useful
1041/// for expensive initialization that shouldn't be included in timing.
1042///
1043/// # Arguments
1044///
1045/// * `spec` - Benchmark configuration specifying iterations and warmup
1046/// * `setup` - Function that creates the input data (called once, not timed)
1047/// * `f` - Benchmark closure that receives a reference to setup result
1048///
1049/// # Example
1050///
1051/// ```ignore
1052/// use mobench_sdk::timing::{BenchSpec, run_closure_with_setup};
1053///
1054/// fn setup_data() -> Vec<u8> {
1055///     vec![0u8; 1_000_000]  // Expensive allocation not measured
1056/// }
1057///
1058/// let spec = BenchSpec::new("hash_benchmark", 100, 10)?;
1059/// let report = run_closure_with_setup(spec, setup_data, |data| {
1060///     std::hint::black_box(compute_hash(data));
1061///     Ok(())
1062/// })?;
1063/// ```
1064pub fn run_closure_with_setup<S, T, F>(
1065    spec: BenchSpec,
1066    setup: S,
1067    mut f: F,
1068) -> Result<BenchReport, TimingError>
1069where
1070    S: FnOnce() -> T,
1071    F: FnMut(&T) -> Result<(), TimingError>,
1072{
1073    let mut monitor = DefaultResourceMonitor;
1074    run_closure_with_setup_with_monitor(spec, &mut monitor, setup, move |input| f(input))
1075}
1076
1077fn run_closure_with_setup_with_monitor<S, T, F, M>(
1078    spec: BenchSpec,
1079    monitor: &mut M,
1080    setup: S,
1081    mut f: F,
1082) -> Result<BenchReport, TimingError>
1083where
1084    S: FnOnce() -> T,
1085    F: FnMut(&T) -> Result<(), TimingError>,
1086    M: ResourceMonitor,
1087{
1088    if spec.iterations == 0 {
1089        return Err(TimingError::NoIterations {
1090            count: spec.iterations,
1091        });
1092    }
1093
1094    reset_semantic_phase_collection();
1095    let harness_origin = Instant::now();
1096    let mut timeline = Vec::new();
1097
1098    // Setup phase - not timed
1099    let setup_start = Instant::now();
1100    let input = setup();
1101    push_timeline_span(
1102        &mut timeline,
1103        harness_origin,
1104        "setup",
1105        setup_start,
1106        Instant::now(),
1107        None,
1108    );
1109
1110    // Warmup phase - not recorded
1111    for iteration in 0..spec.warmup {
1112        let phase_start = Instant::now();
1113        f(&input)?;
1114        push_timeline_span(
1115            &mut timeline,
1116            harness_origin,
1117            "warmup-benchmark",
1118            phase_start,
1119            Instant::now(),
1120            Some(iteration),
1121        );
1122    }
1123
1124    // Measurement phase
1125    begin_semantic_phase_collection();
1126    let mut samples = Vec::with_capacity(spec.iterations as usize);
1127    for iteration in 0..spec.iterations {
1128        let (sample, start, end) = match measure_iteration(monitor, || f(&input)) {
1129            Ok(measurement) => measurement,
1130            Err(err) => {
1131                let _ = finish_semantic_phase_collection();
1132                return Err(err);
1133            }
1134        };
1135        samples.push(sample);
1136        push_timeline_span(
1137            &mut timeline,
1138            harness_origin,
1139            "measured-benchmark",
1140            start,
1141            end,
1142            Some(iteration),
1143        );
1144    }
1145    let phases = finish_semantic_phase_collection();
1146
1147    Ok(BenchReport {
1148        spec,
1149        samples,
1150        phases,
1151        timeline,
1152    })
1153}
1154
1155/// Runs a benchmark with per-iteration setup.
1156///
1157/// Setup runs before each iteration and is not timed. The benchmark takes
1158/// ownership of the setup result, making this suitable for benchmarks that
1159/// mutate their input (e.g., sorting).
1160///
1161/// # Arguments
1162///
1163/// * `spec` - Benchmark configuration specifying iterations and warmup
1164/// * `setup` - Function that creates fresh input for each iteration (not timed)
1165/// * `f` - Benchmark closure that takes ownership of setup result
1166///
1167/// # Example
1168///
1169/// ```ignore
1170/// use mobench_sdk::timing::{BenchSpec, run_closure_with_setup_per_iter};
1171///
1172/// fn generate_random_vec() -> Vec<i32> {
1173///     (0..1000).map(|_| rand::random()).collect()
1174/// }
1175///
1176/// let spec = BenchSpec::new("sort_benchmark", 100, 10)?;
1177/// let report = run_closure_with_setup_per_iter(spec, generate_random_vec, |mut data| {
1178///     data.sort();
1179///     std::hint::black_box(data);
1180///     Ok(())
1181/// })?;
1182/// ```
1183pub fn run_closure_with_setup_per_iter<S, T, F>(
1184    spec: BenchSpec,
1185    mut setup: S,
1186    mut f: F,
1187) -> Result<BenchReport, TimingError>
1188where
1189    S: FnMut() -> T,
1190    F: FnMut(T) -> Result<(), TimingError>,
1191{
1192    let mut monitor = DefaultResourceMonitor;
1193    run_closure_with_setup_per_iter_with_monitor(
1194        spec,
1195        &mut monitor,
1196        move || setup(),
1197        move |input| f(input),
1198    )
1199}
1200
1201fn run_closure_with_setup_per_iter_with_monitor<S, T, F, M>(
1202    spec: BenchSpec,
1203    monitor: &mut M,
1204    mut setup: S,
1205    mut f: F,
1206) -> Result<BenchReport, TimingError>
1207where
1208    S: FnMut() -> T,
1209    F: FnMut(T) -> Result<(), TimingError>,
1210    M: ResourceMonitor,
1211{
1212    if spec.iterations == 0 {
1213        return Err(TimingError::NoIterations {
1214            count: spec.iterations,
1215        });
1216    }
1217
1218    reset_semantic_phase_collection();
1219    let harness_origin = Instant::now();
1220    let mut timeline = Vec::new();
1221
1222    // Warmup phase
1223    for iteration in 0..spec.warmup {
1224        let setup_start = Instant::now();
1225        let input = setup();
1226        push_timeline_span(
1227            &mut timeline,
1228            harness_origin,
1229            "fixture-setup",
1230            setup_start,
1231            Instant::now(),
1232            Some(iteration),
1233        );
1234        let phase_start = Instant::now();
1235        f(input)?;
1236        push_timeline_span(
1237            &mut timeline,
1238            harness_origin,
1239            "warmup-benchmark",
1240            phase_start,
1241            Instant::now(),
1242            Some(iteration),
1243        );
1244    }
1245
1246    // Measurement phase
1247    begin_semantic_phase_collection();
1248    let mut samples = Vec::with_capacity(spec.iterations as usize);
1249    for iteration in 0..spec.iterations {
1250        let setup_start = Instant::now();
1251        let input = setup(); // Not timed
1252        push_timeline_span(
1253            &mut timeline,
1254            harness_origin,
1255            "fixture-setup",
1256            setup_start,
1257            Instant::now(),
1258            Some(iteration),
1259        );
1260
1261        let (sample, start, end) = match measure_iteration(monitor, || f(input)) {
1262            Ok(measurement) => measurement,
1263            Err(err) => {
1264                let _ = finish_semantic_phase_collection();
1265                return Err(err);
1266            }
1267        };
1268        samples.push(sample);
1269        push_timeline_span(
1270            &mut timeline,
1271            harness_origin,
1272            "measured-benchmark",
1273            start,
1274            end,
1275            Some(iteration),
1276        );
1277    }
1278    let phases = finish_semantic_phase_collection();
1279
1280    Ok(BenchReport {
1281        spec,
1282        samples,
1283        phases,
1284        timeline,
1285    })
1286}
1287
1288/// Runs a benchmark with setup and teardown.
1289///
1290/// Setup runs once before all iterations, teardown runs once after all
1291/// iterations complete. Neither is included in timing.
1292///
1293/// # Arguments
1294///
1295/// * `spec` - Benchmark configuration specifying iterations and warmup
1296/// * `setup` - Function that creates the input data (called once, not timed)
1297/// * `f` - Benchmark closure that receives a reference to setup result
1298/// * `teardown` - Function that cleans up the input (called once, not timed)
1299///
1300/// # Example
1301///
1302/// ```ignore
1303/// use mobench_sdk::timing::{BenchSpec, run_closure_with_setup_teardown};
1304///
1305/// fn setup_db() -> Database { Database::connect("test.db") }
1306/// fn cleanup_db(db: Database) { db.close(); std::fs::remove_file("test.db").ok(); }
1307///
1308/// let spec = BenchSpec::new("db_benchmark", 100, 10)?;
1309/// let report = run_closure_with_setup_teardown(
1310///     spec,
1311///     setup_db,
1312///     |db| { db.query("SELECT *"); Ok(()) },
1313///     cleanup_db,
1314/// )?;
1315/// ```
1316pub fn run_closure_with_setup_teardown<S, T, F, D>(
1317    spec: BenchSpec,
1318    setup: S,
1319    mut f: F,
1320    teardown: D,
1321) -> Result<BenchReport, TimingError>
1322where
1323    S: FnOnce() -> T,
1324    F: FnMut(&T) -> Result<(), TimingError>,
1325    D: FnOnce(T),
1326{
1327    let mut monitor = DefaultResourceMonitor;
1328    run_closure_with_setup_teardown_with_monitor(
1329        spec,
1330        &mut monitor,
1331        setup,
1332        move |input| f(input),
1333        teardown,
1334    )
1335}
1336
1337fn run_closure_with_setup_teardown_with_monitor<S, T, F, D, M>(
1338    spec: BenchSpec,
1339    monitor: &mut M,
1340    setup: S,
1341    mut f: F,
1342    teardown: D,
1343) -> Result<BenchReport, TimingError>
1344where
1345    S: FnOnce() -> T,
1346    F: FnMut(&T) -> Result<(), TimingError>,
1347    D: FnOnce(T),
1348    M: ResourceMonitor,
1349{
1350    if spec.iterations == 0 {
1351        return Err(TimingError::NoIterations {
1352            count: spec.iterations,
1353        });
1354    }
1355
1356    reset_semantic_phase_collection();
1357    let harness_origin = Instant::now();
1358    let mut timeline = Vec::new();
1359
1360    // Setup phase - not timed
1361    let setup_start = Instant::now();
1362    let input = setup();
1363    push_timeline_span(
1364        &mut timeline,
1365        harness_origin,
1366        "setup",
1367        setup_start,
1368        Instant::now(),
1369        None,
1370    );
1371
1372    // Warmup phase
1373    for iteration in 0..spec.warmup {
1374        let phase_start = Instant::now();
1375        f(&input)?;
1376        push_timeline_span(
1377            &mut timeline,
1378            harness_origin,
1379            "warmup-benchmark",
1380            phase_start,
1381            Instant::now(),
1382            Some(iteration),
1383        );
1384    }
1385
1386    // Measurement phase
1387    begin_semantic_phase_collection();
1388    let mut samples = Vec::with_capacity(spec.iterations as usize);
1389    for iteration in 0..spec.iterations {
1390        let (sample, start, end) = match measure_iteration(monitor, || f(&input)) {
1391            Ok(measurement) => measurement,
1392            Err(err) => {
1393                let _ = finish_semantic_phase_collection();
1394                return Err(err);
1395            }
1396        };
1397        samples.push(sample);
1398        push_timeline_span(
1399            &mut timeline,
1400            harness_origin,
1401            "measured-benchmark",
1402            start,
1403            end,
1404            Some(iteration),
1405        );
1406    }
1407    let phases = finish_semantic_phase_collection();
1408
1409    // Teardown phase - not timed
1410    let teardown_start = Instant::now();
1411    teardown(input);
1412    push_timeline_span(
1413        &mut timeline,
1414        harness_origin,
1415        "teardown",
1416        teardown_start,
1417        Instant::now(),
1418        None,
1419    );
1420
1421    Ok(BenchReport {
1422        spec,
1423        samples,
1424        phases,
1425        timeline,
1426    })
1427}
1428
1429#[cfg(test)]
1430mod tests {
1431    use super::*;
1432
1433    #[derive(Default)]
1434    struct FakeResourceMonitor {
1435        samples: Vec<IterationResourceUsage>,
1436        started: usize,
1437        finished: usize,
1438    }
1439
1440    impl FakeResourceMonitor {
1441        fn new(samples: Vec<IterationResourceUsage>) -> Self {
1442            Self {
1443                samples,
1444                started: 0,
1445                finished: 0,
1446            }
1447        }
1448    }
1449
1450    impl ResourceMonitor for FakeResourceMonitor {
1451        type Token = usize;
1452
1453        fn start(&mut self) -> Self::Token {
1454            let token = self.started;
1455            self.started += 1;
1456            assert!(
1457                token < self.samples.len(),
1458                "resource capture should only run for measured iterations"
1459            );
1460            token
1461        }
1462
1463        fn finish(&mut self, token: Self::Token) -> IterationResourceUsage {
1464            self.finished += 1;
1465            self.samples
1466                .get(token)
1467                .cloned()
1468                .expect("resource usage for measured iteration")
1469        }
1470    }
1471
1472    #[cfg(unix)]
1473    #[test]
1474    fn process_cpu_time_snapshot_sums_user_and_kernel_time() {
1475        let snapshot = ProcessCpuTimeSnapshot::from_rusage_timevals(
1476            libc::timeval {
1477                tv_sec: 1,
1478                tv_usec: 250_000,
1479            },
1480            libc::timeval {
1481                tv_sec: 0,
1482                tv_usec: 750_000,
1483            },
1484        )
1485        .expect("valid snapshot");
1486
1487        assert_eq!(snapshot.total_ns(), 2_000_000_000);
1488    }
1489
1490    #[cfg(unix)]
1491    #[test]
1492    fn process_cpu_time_delta_ms_uses_user_and_kernel_time() {
1493        let start = ProcessCpuTimeSnapshot::from_rusage_timevals(
1494            libc::timeval {
1495                tv_sec: 1,
1496                tv_usec: 250_000,
1497            },
1498            libc::timeval {
1499                tv_sec: 0,
1500                tv_usec: 750_000,
1501            },
1502        )
1503        .expect("valid start snapshot");
1504        let end = ProcessCpuTimeSnapshot::from_rusage_timevals(
1505            libc::timeval {
1506                tv_sec: 1,
1507                tv_usec: 900_000,
1508            },
1509            libc::timeval {
1510                tv_sec: 1,
1511                tv_usec: 400_600,
1512            },
1513        )
1514        .expect("valid end snapshot");
1515
1516        assert_eq!(process_cpu_delta_ms(start, end), Some(1_301));
1517    }
1518
1519    #[test]
1520    fn runs_benchmark_collects_requested_samples() {
1521        let spec = BenchSpec::new("noop", 3, 1).unwrap();
1522        let report = run_closure(spec, || Ok(())).unwrap();
1523
1524        assert_eq!(report.samples.len(), 3);
1525        assert_eq!(report.spec.name, "noop");
1526        assert_eq!(report.spec.iterations, 3);
1527    }
1528
1529    #[test]
1530    fn rejects_zero_iterations() {
1531        let result = BenchSpec::new("test", 0, 10);
1532        assert!(matches!(
1533            result,
1534            Err(TimingError::NoIterations { count: 0 })
1535        ));
1536    }
1537
1538    #[test]
1539    fn allows_zero_warmup() {
1540        let spec = BenchSpec::new("test", 5, 0).unwrap();
1541        assert_eq!(spec.warmup, 0);
1542
1543        let report = run_closure(spec, || Ok(())).unwrap();
1544        assert_eq!(report.samples.len(), 5);
1545    }
1546
1547    #[test]
1548    fn serializes_to_json() {
1549        let report = BenchReport {
1550            spec: BenchSpec::new("test", 10, 2).unwrap(),
1551            samples: vec![BenchSample {
1552                duration_ns: 1_000_000,
1553                cpu_time_ms: Some(42),
1554                peak_memory_kb: Some(512),
1555            }],
1556            phases: vec![SemanticPhase {
1557                name: "prove".to_string(),
1558                duration_ns: 1_000_000,
1559            }],
1560            timeline: vec![HarnessTimelineSpan {
1561                phase: "measured-benchmark".to_string(),
1562                start_offset_ns: 0,
1563                end_offset_ns: 1_000_000,
1564                iteration: Some(0),
1565            }],
1566        };
1567
1568        let json = serde_json::to_string(&report).unwrap();
1569        let restored: BenchReport = serde_json::from_str(&json).unwrap();
1570
1571        assert_eq!(restored.spec.name, "test");
1572        assert_eq!(restored.samples.len(), 1);
1573        assert_eq!(restored.samples[0].cpu_time_ms, Some(42));
1574        assert_eq!(restored.samples[0].peak_memory_kb, Some(512));
1575        assert_eq!(restored.phases.len(), 1);
1576        assert_eq!(restored.phases[0].name, "prove");
1577        assert!(restored.phases[0].duration_ns > 0);
1578    }
1579
1580    #[test]
1581    fn profile_phase_records_only_measured_iterations() {
1582        let spec = BenchSpec::new("semantic", 2, 1).unwrap();
1583        let mut call_index = 0u32;
1584        let report = run_closure(spec, || {
1585            let phase_name = if call_index == 0 {
1586                "warmup-only"
1587            } else {
1588                "prove"
1589            };
1590            call_index += 1;
1591            profile_phase(phase_name, || std::thread::sleep(Duration::from_millis(1)));
1592            Ok(())
1593        })
1594        .unwrap();
1595
1596        assert!(
1597            !report
1598                .phases
1599                .iter()
1600                .any(|phase| phase.name == "warmup-only"),
1601            "warmup phases should not be recorded"
1602        );
1603        let prove = report
1604            .phases
1605            .iter()
1606            .find(|phase| phase.name == "prove")
1607            .expect("prove phase");
1608        assert!(prove.duration_ns > 0);
1609    }
1610
1611    #[test]
1612    fn profile_phase_keeps_the_v1_model_flat() {
1613        let spec = BenchSpec::new("semantic-flat", 1, 0).unwrap();
1614        let report = run_closure(spec, || {
1615            profile_phase("prove", || {
1616                std::thread::sleep(Duration::from_millis(1));
1617                profile_phase("inner", || std::thread::sleep(Duration::from_millis(1)));
1618            });
1619            Ok(())
1620        })
1621        .unwrap();
1622
1623        assert!(report.phases.iter().any(|phase| phase.name == "prove"));
1624        assert!(
1625            !report.phases.iter().any(|phase| phase.name == "inner"),
1626            "nested phases should not create a second flat phase entry"
1627        );
1628    }
1629
1630    #[test]
1631    fn measured_cpu_excludes_warmup_iterations() {
1632        let spec = BenchSpec::new("cpu", 2, 1).unwrap();
1633        let mut monitor = FakeResourceMonitor::new(vec![
1634            IterationResourceUsage {
1635                cpu_time_ms: Some(11),
1636                peak_memory_kb: Some(32),
1637            },
1638            IterationResourceUsage {
1639                cpu_time_ms: Some(17),
1640                peak_memory_kb: Some(64),
1641            },
1642        ]);
1643        let mut calls = 0_u32;
1644
1645        let report = run_closure_with_monitor(spec, &mut monitor, || {
1646            calls += 1;
1647            Ok(())
1648        })
1649        .unwrap();
1650
1651        assert_eq!(calls, 3);
1652        assert_eq!(monitor.started, 2);
1653        assert_eq!(monitor.finished, 2);
1654        assert_eq!(
1655            report
1656                .samples
1657                .iter()
1658                .map(|sample| sample.cpu_time_ms)
1659                .collect::<Vec<_>>(),
1660            vec![Some(11), Some(17)]
1661        );
1662        assert_eq!(report.cpu_total_ms(), Some(28));
1663    }
1664
1665    #[test]
1666    fn measured_cpu_excludes_outer_harness_and_report_overhead() {
1667        let spec = BenchSpec::new("cpu-harness", 2, 1).unwrap();
1668        let mut monitor = FakeResourceMonitor::new(vec![
1669            IterationResourceUsage {
1670                cpu_time_ms: Some(5),
1671                peak_memory_kb: Some(12),
1672            },
1673            IterationResourceUsage {
1674                cpu_time_ms: Some(7),
1675                peak_memory_kb: Some(18),
1676            },
1677        ]);
1678
1679        let mut setup_calls = 0_u32;
1680        let mut teardown_calls = 0_u32;
1681        let report = run_closure_with_setup_teardown_with_monitor(
1682            spec,
1683            &mut monitor,
1684            || {
1685                setup_calls += 1;
1686                vec![1_u8, 2, 3]
1687            },
1688            |_fixture| Ok(()),
1689            |_fixture| {
1690                teardown_calls += 1;
1691            },
1692        )
1693        .unwrap();
1694
1695        let _serialized = serde_json::to_string(&report).unwrap();
1696
1697        assert_eq!(setup_calls, 1);
1698        assert_eq!(teardown_calls, 1);
1699        assert_eq!(monitor.started, 2);
1700        assert_eq!(report.cpu_total_ms(), Some(12));
1701        assert_eq!(report.cpu_median_ms(), Some(6));
1702    }
1703
1704    #[test]
1705    fn single_iteration_cpu_median_matches_the_measured_iteration() {
1706        let spec = BenchSpec::new("single", 1, 0).unwrap();
1707        let mut monitor = FakeResourceMonitor::new(vec![IterationResourceUsage {
1708            cpu_time_ms: Some(42),
1709            peak_memory_kb: Some(24),
1710        }]);
1711
1712        let report = run_closure_with_monitor(spec, &mut monitor, || Ok(())).unwrap();
1713
1714        assert_eq!(report.samples[0].cpu_time_ms, Some(42));
1715        assert_eq!(report.cpu_total_ms(), Some(42));
1716        assert_eq!(report.cpu_median_ms(), Some(42));
1717    }
1718
1719    #[test]
1720    fn multiple_iterations_export_the_median_cpu_sample() {
1721        let spec = BenchSpec::new("median", 3, 0).unwrap();
1722        let mut monitor = FakeResourceMonitor::new(vec![
1723            IterationResourceUsage {
1724                cpu_time_ms: Some(19),
1725                peak_memory_kb: Some(10),
1726            },
1727            IterationResourceUsage {
1728                cpu_time_ms: Some(7),
1729                peak_memory_kb: Some(30),
1730            },
1731            IterationResourceUsage {
1732                cpu_time_ms: Some(11),
1733                peak_memory_kb: Some(20),
1734            },
1735        ]);
1736
1737        let report = run_closure_with_monitor(spec, &mut monitor, || Ok(())).unwrap();
1738
1739        assert_eq!(report.cpu_median_ms(), Some(11));
1740        assert_eq!(report.cpu_total_ms(), Some(37));
1741    }
1742
1743    #[test]
1744    fn peak_memory_excludes_harness_baseline_overhead() {
1745        let spec = BenchSpec::new("memory", 2, 1).unwrap();
1746        let mut monitor = FakeResourceMonitor::new(vec![
1747            IterationResourceUsage {
1748                cpu_time_ms: Some(3),
1749                peak_memory_kb: Some(48),
1750            },
1751            IterationResourceUsage {
1752                cpu_time_ms: Some(4),
1753                peak_memory_kb: Some(96),
1754            },
1755        ]);
1756
1757        let report = run_closure_with_setup_teardown_with_monitor(
1758            spec,
1759            &mut monitor,
1760            || vec![0_u8; 1024],
1761            |_fixture| Ok(()),
1762            |_fixture| {},
1763        )
1764        .unwrap();
1765
1766        assert_eq!(
1767            report
1768                .samples
1769                .iter()
1770                .map(|sample| sample.peak_memory_kb)
1771                .collect::<Vec<_>>(),
1772            vec![Some(48), Some(96)]
1773        );
1774        assert_eq!(report.peak_memory_kb(), Some(96));
1775    }
1776
1777    #[test]
1778    fn memory_peak_sampler_uses_the_first_post_startup_sample_as_its_baseline() {
1779        use std::collections::VecDeque;
1780        use std::sync::{Arc, Mutex};
1781
1782        let samples = Arc::new(Mutex::new(VecDeque::from([
1783            Some(80_u64),
1784            Some(100_u64),
1785            Some(140_u64),
1786            Some(120_u64),
1787        ])));
1788        let reader_samples = Arc::clone(&samples);
1789        let reader = Arc::new(move || {
1790            reader_samples
1791                .lock()
1792                .expect("sample queue")
1793                .pop_front()
1794                .unwrap_or(Some(120))
1795        });
1796
1797        let sampler = MemoryPeakSampler::start_with_reader(reader).expect("sampler");
1798        let peak_kb = sampler.stop().expect("peak memory");
1799
1800        assert_eq!(peak_kb, 40);
1801    }
1802
1803    #[test]
1804    fn run_with_setup_calls_setup_once() {
1805        use std::sync::atomic::{AtomicU32, Ordering};
1806
1807        static SETUP_COUNT: AtomicU32 = AtomicU32::new(0);
1808        static RUN_COUNT: AtomicU32 = AtomicU32::new(0);
1809
1810        let spec = BenchSpec::new("test", 5, 2).unwrap();
1811        let report = run_closure_with_setup(
1812            spec,
1813            || {
1814                SETUP_COUNT.fetch_add(1, Ordering::SeqCst);
1815                vec![1, 2, 3]
1816            },
1817            |data| {
1818                RUN_COUNT.fetch_add(1, Ordering::SeqCst);
1819                std::hint::black_box(data.len());
1820                Ok(())
1821            },
1822        )
1823        .unwrap();
1824
1825        assert_eq!(SETUP_COUNT.load(Ordering::SeqCst), 1); // Setup called once
1826        assert_eq!(RUN_COUNT.load(Ordering::SeqCst), 7); // 2 warmup + 5 iterations
1827        assert_eq!(report.samples.len(), 5);
1828    }
1829
1830    #[test]
1831    fn run_with_setup_per_iter_calls_setup_each_time() {
1832        use std::sync::atomic::{AtomicU32, Ordering};
1833
1834        static SETUP_COUNT: AtomicU32 = AtomicU32::new(0);
1835
1836        let spec = BenchSpec::new("test", 3, 1).unwrap();
1837        let report = run_closure_with_setup_per_iter(
1838            spec,
1839            || {
1840                SETUP_COUNT.fetch_add(1, Ordering::SeqCst);
1841                vec![1, 2, 3]
1842            },
1843            |data| {
1844                std::hint::black_box(data);
1845                Ok(())
1846            },
1847        )
1848        .unwrap();
1849
1850        assert_eq!(SETUP_COUNT.load(Ordering::SeqCst), 4); // 1 warmup + 3 iterations
1851        assert_eq!(report.samples.len(), 3);
1852    }
1853
1854    #[test]
1855    fn run_with_setup_teardown_calls_both() {
1856        use std::sync::atomic::{AtomicU32, Ordering};
1857
1858        static SETUP_COUNT: AtomicU32 = AtomicU32::new(0);
1859        static TEARDOWN_COUNT: AtomicU32 = AtomicU32::new(0);
1860
1861        let spec = BenchSpec::new("test", 3, 1).unwrap();
1862        let report = run_closure_with_setup_teardown(
1863            spec,
1864            || {
1865                SETUP_COUNT.fetch_add(1, Ordering::SeqCst);
1866                "resource"
1867            },
1868            |_resource| Ok(()),
1869            |_resource| {
1870                TEARDOWN_COUNT.fetch_add(1, Ordering::SeqCst);
1871            },
1872        )
1873        .unwrap();
1874
1875        assert_eq!(SETUP_COUNT.load(Ordering::SeqCst), 1);
1876        assert_eq!(TEARDOWN_COUNT.load(Ordering::SeqCst), 1);
1877        assert_eq!(report.samples.len(), 3);
1878    }
1879
1880    #[test]
1881    fn bench_report_serializes_exact_harness_timeline() {
1882        let spec = BenchSpec::new("timeline", 2, 1).unwrap();
1883        let report = run_closure_with_setup_teardown(
1884            spec,
1885            || {
1886                std::thread::sleep(Duration::from_millis(1));
1887                "resource"
1888            },
1889            |_resource| {
1890                std::thread::sleep(Duration::from_millis(1));
1891                Ok(())
1892            },
1893            |_resource| {
1894                std::thread::sleep(Duration::from_millis(1));
1895            },
1896        )
1897        .unwrap();
1898
1899        let json = serde_json::to_value(&report).unwrap();
1900        assert_eq!(json["timeline"][0]["phase"], "setup");
1901        assert_eq!(json["timeline"][1]["phase"], "warmup-benchmark");
1902        assert_eq!(json["timeline"][2]["phase"], "measured-benchmark");
1903        assert_eq!(json["timeline"][3]["phase"], "measured-benchmark");
1904        assert_eq!(json["timeline"][4]["phase"], "teardown");
1905    }
1906}