mobench-sdk 0.1.28

//! Lightweight benchmarking harness for mobile platforms.
//!
//! This module provides the core timing infrastructure for the mobench ecosystem.
//! It was previously a separate crate (`mobench-runner`) but has been consolidated
//! into `mobench-sdk` for a simpler dependency graph.
//!
//! The module is designed to be minimal and portable, with no platform-specific
//! dependencies, making it suitable for compilation to Android and iOS targets.
//!
//! ## Overview
//!
//! The timing module executes benchmark functions with:
//! - Configurable warmup iterations
//! - Precise nanosecond-resolution timing
//! - Simple, serializable results
//!
//! ## Usage
//!
//! Most users should use this via the higher-level [`crate::run_benchmark`] function
//! or [`crate::BenchmarkBuilder`]. Direct usage is for custom integrations:
//!
//! ```
//! use mobench_sdk::timing::{BenchSpec, run_closure, TimingError};
//!
//! // Define a benchmark specification
//! let spec = BenchSpec::new("my_benchmark", 100, 10)?;
//!
//! // Run the benchmark
//! let report = run_closure(spec, || {
//!     // Your benchmark code
//!     let sum: u64 = (0..1000).sum();
//!     std::hint::black_box(sum);
//!     Ok(())
//! })?;
//!
//! // Analyze results
//! let mean_ns = report.samples.iter()
//!     .map(|s| s.duration_ns)
//!     .sum::<u64>() / report.samples.len() as u64;
//!
//! println!("Mean: {} ns", mean_ns);
//! # Ok::<(), TimingError>(())
//! ```
//!
//! ## Types
//!
//! | Type | Description |
//! |------|-------------|
//! | [`BenchSpec`] | Benchmark configuration (name, iterations, warmup) |
//! | [`BenchSample`] | Single timing measurement in nanoseconds |
//! | [`BenchReport`] | Complete results with all samples |
//! | [`TimingError`] | Error conditions during benchmarking |
//!
//! ## Feature Flags
//!
//! This module is always available. When using `mobench-sdk` with default features,
//! you also get build automation and template generation. For minimal binary size
//! (e.g., on mobile targets), use the `runner-only` feature:
//!
//! ```toml
//! [dependencies]
//! mobench-sdk = { version = "0.1", default-features = false, features = ["runner-only"] }
//! ```

use serde::{Deserialize, Serialize};
use std::cell::RefCell;
use std::time::{Duration, Instant};
use thiserror::Error;

/// Benchmark specification defining what and how to benchmark.
///
/// Contains the benchmark name, number of measurement iterations, and
/// warmup iterations to perform before measuring.
///
/// # Example
///
/// ```
/// use mobench_sdk::timing::BenchSpec;
///
/// // Create a spec for 100 iterations with 10 warmup runs
/// let spec = BenchSpec::new("sorting_benchmark", 100, 10)?;
///
/// assert_eq!(spec.name, "sorting_benchmark");
/// assert_eq!(spec.iterations, 100);
/// assert_eq!(spec.warmup, 10);
/// # Ok::<(), mobench_sdk::timing::TimingError>(())
/// ```
///
/// # Serialization
///
/// `BenchSpec` implements `Serialize` and `Deserialize` for JSON persistence:
///
/// ```
/// use mobench_sdk::timing::BenchSpec;
///
/// let spec = BenchSpec {
///     name: "my_bench".to_string(),
///     iterations: 50,
///     warmup: 5,
/// };
///
/// let json = serde_json::to_string(&spec)?;
/// let restored: BenchSpec = serde_json::from_str(&json)?;
///
/// assert_eq!(spec.name, restored.name);
/// # Ok::<(), serde_json::Error>(())
/// ```
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct BenchSpec {
    /// Name of the benchmark, typically the fully-qualified function name.
    ///
    /// Examples: `"my_crate::fibonacci"`, `"sorting_benchmark"`
    pub name: String,

    /// Number of iterations to measure.
    ///
    /// Each iteration produces one [`BenchSample`]. Must be greater than zero.
    pub iterations: u32,

    /// Number of warmup iterations before measurement.
    ///
    /// Warmup iterations are not recorded. They allow CPU caches to warm
    /// and any JIT compilation to complete. Can be zero.
    pub warmup: u32,
}

impl BenchSpec {
    /// Creates a new benchmark specification.
    ///
    /// # Arguments
    ///
    /// * `name` - Name identifier for the benchmark
    /// * `iterations` - Number of measured iterations (must be > 0)
    /// * `warmup` - Number of warmup iterations (can be 0)
    ///
    /// # Errors
    ///
    /// Returns [`TimingError::NoIterations`] if `iterations` is zero.
    ///
    /// # Example
    ///
    /// ```
    /// use mobench_sdk::timing::BenchSpec;
    ///
    /// let spec = BenchSpec::new("test", 100, 10)?;
    /// assert_eq!(spec.iterations, 100);
    ///
    /// // Zero iterations is an error
    /// let err = BenchSpec::new("test", 0, 10);
    /// assert!(err.is_err());
    /// # Ok::<(), mobench_sdk::timing::TimingError>(())
    /// ```
    pub fn new(name: impl Into<String>, iterations: u32, warmup: u32) -> Result<Self, TimingError> {
        if iterations == 0 {
            return Err(TimingError::NoIterations { count: iterations });
        }

        Ok(Self {
            name: name.into(),
            iterations,
            warmup,
        })
    }
}

/// A single timing sample from a benchmark iteration.
///
/// Contains the elapsed time in nanoseconds for one execution of the
/// benchmark function.
///
/// # Example
///
/// ```
/// use mobench_sdk::timing::BenchSample;
///
/// let sample = BenchSample { duration_ns: 1_500_000 };
///
/// // Convert to milliseconds
/// let ms = sample.duration_ns as f64 / 1_000_000.0;
/// assert_eq!(ms, 1.5);
/// ```
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct BenchSample {
    /// Duration of the iteration in nanoseconds.
    ///
    /// Measured using [`std::time::Instant`] for monotonic, high-resolution timing.
    pub duration_ns: u64,
}

impl BenchSample {
    /// Creates a sample from a [`Duration`].
    fn from_duration(duration: Duration) -> Self {
        Self {
            duration_ns: duration.as_nanos() as u64,
        }
    }
}

/// Complete benchmark report with all timing samples.
///
/// Contains the original specification and all collected samples.
/// Can be serialized to JSON for storage or transmission.
///
/// # Example
///
/// ```
/// use mobench_sdk::timing::{BenchSpec, run_closure};
///
/// let spec = BenchSpec::new("example", 50, 5)?;
/// let report = run_closure(spec, || {
///     std::hint::black_box(42);
///     Ok(())
/// })?;
///
/// // Calculate statistics
/// let samples: Vec<u64> = report.samples.iter()
///     .map(|s| s.duration_ns)
///     .collect();
///
/// let min = samples.iter().min().unwrap();
/// let max = samples.iter().max().unwrap();
/// let mean = samples.iter().sum::<u64>() / samples.len() as u64;
///
/// println!("Min: {} ns, Max: {} ns, Mean: {} ns", min, max, mean);
/// # Ok::<(), mobench_sdk::timing::TimingError>(())
/// ```
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct BenchReport {
    /// The specification used for this benchmark run.
    pub spec: BenchSpec,

    /// All collected timing samples.
    ///
    /// The length equals `spec.iterations`. Samples are in execution order.
    pub samples: Vec<BenchSample>,

    /// Optional semantic phase timings captured during measured iterations.
    pub phases: Vec<SemanticPhase>,

    /// Optional resource usage scoped to measured iterations.
    #[serde(default)]
    pub resource_usage: Option<BenchResourceUsage>,
}

/// Resource usage captured during the measured portion of a benchmark run.
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub struct BenchResourceUsage {
    /// Median process CPU time per measured iteration, in milliseconds.
    pub cpu_median_ms: Option<u64>,
    /// Peak process memory above the measurement baseline, in kilobytes.
    pub peak_memory_kb: Option<u64>,
}

impl BenchResourceUsage {
    #[must_use]
    pub fn is_empty(&self) -> bool {
        self.cpu_median_ms.is_none() && self.peak_memory_kb.is_none()
    }
}

impl BenchReport {
    /// Returns the mean (average) duration in nanoseconds.
    #[must_use]
    pub fn mean_ns(&self) -> f64 {
        if self.samples.is_empty() {
            return 0.0;
        }
        let sum: u64 = self.samples.iter().map(|s| s.duration_ns).sum();
        sum as f64 / self.samples.len() as f64
    }

    /// Returns the median duration in nanoseconds.
    #[must_use]
    pub fn median_ns(&self) -> f64 {
        if self.samples.is_empty() {
            return 0.0;
        }
        let mut sorted: Vec<u64> = self.samples.iter().map(|s| s.duration_ns).collect();
        sorted.sort_unstable();
        let len = sorted.len();
        if len % 2 == 0 {
            (sorted[len / 2 - 1] + sorted[len / 2]) as f64 / 2.0
        } else {
            sorted[len / 2] as f64
        }
    }

    /// Returns the standard deviation in nanoseconds (sample std dev, n-1).
    #[must_use]
    pub fn std_dev_ns(&self) -> f64 {
        if self.samples.len() < 2 {
            return 0.0;
        }
        let mean = self.mean_ns();
        let variance: f64 = self
            .samples
            .iter()
            .map(|s| {
                let diff = s.duration_ns as f64 - mean;
                diff * diff
            })
            .sum::<f64>()
            / (self.samples.len() - 1) as f64;
        variance.sqrt()
    }

    /// Returns the given percentile (0-100) in nanoseconds.
    #[must_use]
    pub fn percentile_ns(&self, p: f64) -> f64 {
        if self.samples.is_empty() {
            return 0.0;
        }
        let mut sorted: Vec<u64> = self.samples.iter().map(|s| s.duration_ns).collect();
        sorted.sort_unstable();
        let p = p.clamp(0.0, 100.0) / 100.0;
        let index = (p * (sorted.len() - 1) as f64).round() as usize;
        sorted[index.min(sorted.len() - 1)] as f64
    }

    /// Returns the minimum duration in nanoseconds.
    #[must_use]
    pub fn min_ns(&self) -> u64 {
        self.samples
            .iter()
            .map(|s| s.duration_ns)
            .min()
            .unwrap_or(0)
    }

    /// Returns the maximum duration in nanoseconds.
    #[must_use]
    pub fn max_ns(&self) -> u64 {
        self.samples
            .iter()
            .map(|s| s.duration_ns)
            .max()
            .unwrap_or(0)
    }

    /// Returns a statistical summary of the benchmark results.
    #[must_use]
    pub fn summary(&self) -> BenchSummary {
        BenchSummary {
            name: self.spec.name.clone(),
            iterations: self.samples.len() as u32,
            warmup: self.spec.warmup,
            mean_ns: self.mean_ns(),
            median_ns: self.median_ns(),
            std_dev_ns: self.std_dev_ns(),
            min_ns: self.min_ns(),
            max_ns: self.max_ns(),
            p95_ns: self.percentile_ns(95.0),
            p99_ns: self.percentile_ns(99.0),
        }
    }
}

/// Statistical summary of benchmark results.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct BenchSummary {
    /// Name of the benchmark.
    pub name: String,
    /// Number of measured iterations.
    pub iterations: u32,
    /// Number of warmup iterations.
    pub warmup: u32,
    /// Mean duration in nanoseconds.
    pub mean_ns: f64,
    /// Median duration in nanoseconds.
    pub median_ns: f64,
    /// Standard deviation in nanoseconds.
    pub std_dev_ns: f64,
    /// Minimum duration in nanoseconds.
    pub min_ns: u64,
    /// Maximum duration in nanoseconds.
    pub max_ns: u64,
    /// 95th percentile in nanoseconds.
    pub p95_ns: f64,
    /// 99th percentile in nanoseconds.
    pub p99_ns: f64,
}

/// Flat semantic phase timing captured during a benchmark run.
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub struct SemanticPhase {
    pub name: String,
    pub duration_ns: u64,
}

#[derive(Default)]
struct SemanticPhaseCollector {
    enabled: bool,
    depth: usize,
    phases: Vec<SemanticPhase>,
}

impl SemanticPhaseCollector {
    fn reset(&mut self) {
        self.enabled = false;
        self.depth = 0;
        self.phases.clear();
    }

    fn begin_measurement(&mut self) {
        self.reset();
        self.enabled = true;
    }

    fn finish(&mut self) -> Vec<SemanticPhase> {
        self.enabled = false;
        self.depth = 0;
        std::mem::take(&mut self.phases)
    }

    fn enter_phase(&mut self) -> Option<bool> {
        if !self.enabled {
            return None;
        }
        let top_level = self.depth == 0;
        self.depth += 1;
        Some(top_level)
    }

    fn exit_phase(&mut self, name: &str, top_level: bool, elapsed: Duration) {
        self.depth = self.depth.saturating_sub(1);
        if !self.enabled || !top_level {
            return;
        }

        let duration_ns = elapsed.as_nanos().min(u128::from(u64::MAX)) as u64;
        if let Some(phase) = self.phases.iter_mut().find(|phase| phase.name == name) {
            phase.duration_ns = phase.duration_ns.saturating_add(duration_ns);
        } else {
            self.phases.push(SemanticPhase {
                name: name.to_string(),
                duration_ns,
            });
        }
    }
}

#[derive(Default)]
struct ResourceUsageCollector {
    enabled: bool,
    current_iteration_cpu_start_ms: Option<u64>,
    cpu_samples_ms: Vec<u64>,
    baseline_memory_kb: Option<u64>,
    peak_memory_kb: Option<u64>,
}

impl ResourceUsageCollector {
    fn reset(&mut self) {
        self.enabled = false;
        self.current_iteration_cpu_start_ms = None;
        self.cpu_samples_ms.clear();
        self.baseline_memory_kb = None;
        self.peak_memory_kb = None;
    }

    fn begin_measurement(&mut self) {
        self.reset();
        self.enabled = true;
        self.refresh_baseline();
    }

    fn refresh_baseline(&mut self) {
        if !self.enabled {
            return;
        }

        self.current_iteration_cpu_start_ms = None;
        self.baseline_memory_kb = current_process_memory_kb();
        if self.baseline_memory_kb.is_some() {
            self.peak_memory_kb.get_or_insert(0);
        }
    }

    fn begin_iteration(&mut self) {
        if !self.enabled {
            return;
        }

        self.current_iteration_cpu_start_ms = current_process_cpu_ms();
    }

    fn sample(&mut self) {
        if !self.enabled {
            return;
        }

        let Some(baseline_memory_kb) = self.baseline_memory_kb else {
            return;
        };
        let Some(current_memory_kb) = current_process_memory_kb() else {
            return;
        };
        let current_peak_kb = current_memory_kb.saturating_sub(baseline_memory_kb);

        match self.peak_memory_kb {
            Some(existing_peak_kb) if existing_peak_kb >= current_peak_kb => {}
            _ => self.peak_memory_kb = Some(current_peak_kb),
        }
    }

    fn end_iteration(&mut self) {
        if !self.enabled {
            return;
        }

        let Some(start_cpu_ms) = self.current_iteration_cpu_start_ms.take() else {
            return;
        };
        let Some(end_cpu_ms) = current_process_cpu_ms() else {
            return;
        };
        if end_cpu_ms < start_cpu_ms {
            return;
        }

        self.cpu_samples_ms.push(end_cpu_ms - start_cpu_ms);
    }

    fn finish(&mut self) -> Option<BenchResourceUsage> {
        self.enabled = false;
        let cpu_median_ms = if self.cpu_samples_ms.iter().any(|sample_ms| *sample_ms > 0) {
            median_u64(&self.cpu_samples_ms)
        } else {
            None
        };
        let resource_usage = Some(BenchResourceUsage {
            cpu_median_ms,
            peak_memory_kb: self.peak_memory_kb,
        });
        self.current_iteration_cpu_start_ms = None;
        self.cpu_samples_ms.clear();
        self.baseline_memory_kb = None;
        self.peak_memory_kb = None;
        resource_usage.filter(|usage| !usage.is_empty())
    }
}

thread_local! {
    static SEMANTIC_PHASE_COLLECTOR: RefCell<SemanticPhaseCollector> =
        RefCell::new(SemanticPhaseCollector::default());
    static RESOURCE_USAGE_COLLECTOR: RefCell<ResourceUsageCollector> =
        RefCell::new(ResourceUsageCollector::default());
}

struct SemanticPhaseGuard {
    name: String,
    started_at: Option<Instant>,
    top_level: bool,
}

impl Drop for SemanticPhaseGuard {
    fn drop(&mut self) {
        let Some(started_at) = self.started_at else {
            return;
        };

        let elapsed = started_at.elapsed();
        SEMANTIC_PHASE_COLLECTOR.with(|collector| {
            collector
                .borrow_mut()
                .exit_phase(&self.name, self.top_level, elapsed);
        });
    }
}

fn reset_semantic_phase_collection() {
    SEMANTIC_PHASE_COLLECTOR.with(|collector| collector.borrow_mut().reset());
}

fn begin_semantic_phase_collection() {
    SEMANTIC_PHASE_COLLECTOR.with(|collector| collector.borrow_mut().begin_measurement());
}

fn finish_semantic_phase_collection() -> Vec<SemanticPhase> {
    SEMANTIC_PHASE_COLLECTOR.with(|collector| collector.borrow_mut().finish())
}

fn reset_resource_usage_collection() {
    RESOURCE_USAGE_COLLECTOR.with(|collector| collector.borrow_mut().reset());
}

fn begin_resource_usage_collection() {
    RESOURCE_USAGE_COLLECTOR.with(|collector| collector.borrow_mut().begin_measurement());
}

fn refresh_resource_usage_baseline() {
    RESOURCE_USAGE_COLLECTOR.with(|collector| collector.borrow_mut().refresh_baseline());
}

fn begin_resource_usage_iteration() {
    RESOURCE_USAGE_COLLECTOR.with(|collector| collector.borrow_mut().begin_iteration());
}

fn sample_resource_usage() {
    RESOURCE_USAGE_COLLECTOR.with(|collector| collector.borrow_mut().sample());
}

fn end_resource_usage_iteration() {
    RESOURCE_USAGE_COLLECTOR.with(|collector| collector.borrow_mut().end_iteration());
}

fn finish_resource_usage_collection() -> Option<BenchResourceUsage> {
    RESOURCE_USAGE_COLLECTOR.with(|collector| collector.borrow_mut().finish())
}

fn median_u64(values: &[u64]) -> Option<u64> {
    if values.is_empty() {
        return None;
    }

    let mut sorted = values.to_vec();
    sorted.sort_unstable();
    let len = sorted.len();
    if len % 2 == 0 {
        Some(((u128::from(sorted[len / 2 - 1]) + u128::from(sorted[len / 2])) / 2) as u64)
    } else {
        Some(sorted[len / 2])
    }
}

/// Records a flat semantic phase when called inside an active benchmark measurement loop.
///
/// Phases are aggregated across measured iterations and ignored during warmup/setup.
/// Nested phases are intentionally collapsed in v1 to keep the output flat.
pub fn profile_phase<T>(name: &str, f: impl FnOnce() -> T) -> T {
    sample_resource_usage();
    let guard = SEMANTIC_PHASE_COLLECTOR.with(|collector| {
        let mut collector = collector.borrow_mut();
        match collector.enter_phase() {
            Some(top_level) => SemanticPhaseGuard {
                name: name.to_string(),
                started_at: Some(Instant::now()),
                top_level,
            },
            None => SemanticPhaseGuard {
                name: String::new(),
                started_at: None,
                top_level: false,
            },
        }
    });

    let result = f();
    drop(guard);
    sample_resource_usage();
    result
}

/// Errors that can occur during benchmark execution.
///
/// # Example
///
/// ```
/// use mobench_sdk::timing::{BenchSpec, TimingError};
///
/// // Zero iterations produces an error
/// let result = BenchSpec::new("test", 0, 10);
/// assert!(matches!(result, Err(TimingError::NoIterations { .. })));
/// ```
#[derive(Debug, Error)]
pub enum TimingError {
    /// The iteration count was zero or invalid.
    ///
    /// At least one iteration is required to produce a measurement.
    /// The error includes the actual value provided for diagnostic purposes.
    #[error("iterations must be greater than zero (got {count}). Minimum recommended: 10")]
    NoIterations {
        /// The invalid iteration count that was provided.
        count: u32,
    },

    /// The benchmark function failed during execution.
    ///
    /// Contains a description of the failure.
    #[error("benchmark function failed: {0}")]
    Execution(String),
}

/// Runs a benchmark by executing a closure repeatedly.
///
/// This is the core benchmarking function. It:
///
/// 1. Executes the closure `spec.warmup` times without recording
/// 2. Executes the closure `spec.iterations` times, recording each duration
/// 3. Returns a [`BenchReport`] with all samples
///
/// # Arguments
///
/// * `spec` - Benchmark configuration specifying iterations and warmup
/// * `f` - Closure to benchmark; must return `Result<(), TimingError>`
///
/// # Returns
///
/// A [`BenchReport`] containing all timing samples, or a [`TimingError`] if
/// the benchmark fails.
///
/// # Example
///
/// ```
/// use mobench_sdk::timing::{BenchSpec, run_closure, TimingError};
///
/// let spec = BenchSpec::new("sum_benchmark", 100, 10)?;
///
/// let report = run_closure(spec, || {
///     let sum: u64 = (0..1000).sum();
///     std::hint::black_box(sum);
///     Ok(())
/// })?;
///
/// assert_eq!(report.samples.len(), 100);
///
/// // Calculate mean duration
/// let total_ns: u64 = report.samples.iter().map(|s| s.duration_ns).sum();
/// let mean_ns = total_ns / report.samples.len() as u64;
/// println!("Mean: {} ns", mean_ns);
/// # Ok::<(), TimingError>(())
/// ```
///
/// # Error Handling
///
/// If the closure returns an error, the benchmark stops immediately:
///
/// ```
/// use mobench_sdk::timing::{BenchSpec, run_closure, TimingError};
///
/// let spec = BenchSpec::new("failing_bench", 100, 0)?;
///
/// let result = run_closure(spec, || {
///     Err(TimingError::Execution("simulated failure".into()))
/// });
///
/// assert!(result.is_err());
/// # Ok::<(), TimingError>(())
/// ```
///
/// # Timing Precision
///
/// Uses [`std::time::Instant`] for timing, which provides monotonic,
/// nanosecond-resolution measurements on most platforms.
pub fn run_closure<F>(spec: BenchSpec, mut f: F) -> Result<BenchReport, TimingError>
where
    F: FnMut() -> Result<(), TimingError>,
{
    if spec.iterations == 0 {
        return Err(TimingError::NoIterations {
            count: spec.iterations,
        });
    }

    reset_semantic_phase_collection();
    reset_resource_usage_collection();

    // Warmup phase - not measured
    for _ in 0..spec.warmup {
        f()?;
    }

    // Measurement phase
    begin_semantic_phase_collection();
    begin_resource_usage_collection();
    let mut samples = Vec::with_capacity(spec.iterations as usize);
    for _ in 0..spec.iterations {
        begin_resource_usage_iteration();
        sample_resource_usage();
        let start = Instant::now();
        if let Err(err) = f() {
            let _ = finish_semantic_phase_collection();
            let _ = finish_resource_usage_collection();
            return Err(err);
        }
        sample_resource_usage();
        end_resource_usage_iteration();
        samples.push(BenchSample::from_duration(start.elapsed()));
    }
    let phases = finish_semantic_phase_collection();
    let resource_usage = finish_resource_usage_collection();

    Ok(BenchReport {
        spec,
        samples,
        phases,
        resource_usage,
    })
}

/// Runs a benchmark with setup that executes once before all iterations.
///
/// The setup function is called once before timing begins, then the benchmark
/// runs multiple times using a reference to the setup result. This is useful
/// for expensive initialization that shouldn't be included in timing.
///
/// # Arguments
///
/// * `spec` - Benchmark configuration specifying iterations and warmup
/// * `setup` - Function that creates the input data (called once, not timed)
/// * `f` - Benchmark closure that receives a reference to setup result
///
/// # Example
///
/// ```ignore
/// use mobench_sdk::timing::{BenchSpec, run_closure_with_setup};
///
/// fn setup_data() -> Vec<u8> {
///     vec![0u8; 1_000_000]  // Expensive allocation not measured
/// }
///
/// let spec = BenchSpec::new("hash_benchmark", 100, 10)?;
/// let report = run_closure_with_setup(spec, setup_data, |data| {
///     std::hint::black_box(compute_hash(data));
///     Ok(())
/// })?;
/// ```
pub fn run_closure_with_setup<S, T, F>(
    spec: BenchSpec,
    setup: S,
    mut f: F,
) -> Result<BenchReport, TimingError>
where
    S: FnOnce() -> T,
    F: FnMut(&T) -> Result<(), TimingError>,
{
    if spec.iterations == 0 {
        return Err(TimingError::NoIterations {
            count: spec.iterations,
        });
    }

    reset_semantic_phase_collection();
    reset_resource_usage_collection();

    // Setup phase - not timed
    let input = setup();

    // Warmup phase - not recorded
    for _ in 0..spec.warmup {
        f(&input)?;
    }

    // Measurement phase
    begin_semantic_phase_collection();
    begin_resource_usage_collection();
    let mut samples = Vec::with_capacity(spec.iterations as usize);
    for _ in 0..spec.iterations {
        begin_resource_usage_iteration();
        sample_resource_usage();
        let start = Instant::now();
        if let Err(err) = f(&input) {
            let _ = finish_semantic_phase_collection();
            let _ = finish_resource_usage_collection();
            return Err(err);
        }
        sample_resource_usage();
        end_resource_usage_iteration();
        samples.push(BenchSample::from_duration(start.elapsed()));
    }
    let phases = finish_semantic_phase_collection();
    let resource_usage = finish_resource_usage_collection();

    Ok(BenchReport {
        spec,
        samples,
        phases,
        resource_usage,
    })
}

/// Runs a benchmark with per-iteration setup.
///
/// Setup runs before each iteration and is not timed. The benchmark takes
/// ownership of the setup result, making this suitable for benchmarks that
/// mutate their input (e.g., sorting).
///
/// # Arguments
///
/// * `spec` - Benchmark configuration specifying iterations and warmup
/// * `setup` - Function that creates fresh input for each iteration (not timed)
/// * `f` - Benchmark closure that takes ownership of setup result
///
/// # Example
///
/// ```ignore
/// use mobench_sdk::timing::{BenchSpec, run_closure_with_setup_per_iter};
///
/// fn generate_random_vec() -> Vec<i32> {
///     (0..1000).map(|_| rand::random()).collect()
/// }
///
/// let spec = BenchSpec::new("sort_benchmark", 100, 10)?;
/// let report = run_closure_with_setup_per_iter(spec, generate_random_vec, |mut data| {
///     data.sort();
///     std::hint::black_box(data);
///     Ok(())
/// })?;
/// ```
pub fn run_closure_with_setup_per_iter<S, T, F>(
    spec: BenchSpec,
    mut setup: S,
    mut f: F,
) -> Result<BenchReport, TimingError>
where
    S: FnMut() -> T,
    F: FnMut(T) -> Result<(), TimingError>,
{
    if spec.iterations == 0 {
        return Err(TimingError::NoIterations {
            count: spec.iterations,
        });
    }

    reset_semantic_phase_collection();
    reset_resource_usage_collection();

    // Warmup phase
    for _ in 0..spec.warmup {
        let input = setup();
        f(input)?;
    }

    // Measurement phase
    begin_semantic_phase_collection();
    begin_resource_usage_collection();
    let mut samples = Vec::with_capacity(spec.iterations as usize);
    for _ in 0..spec.iterations {
        let input = setup(); // Not timed

        refresh_resource_usage_baseline();
        begin_resource_usage_iteration();
        sample_resource_usage();
        let start = Instant::now();
        if let Err(err) = f(input) {
            let _ = finish_semantic_phase_collection();
            let _ = finish_resource_usage_collection();
            return Err(err);
        }
        sample_resource_usage();
        end_resource_usage_iteration();
        samples.push(BenchSample::from_duration(start.elapsed()));
    }
    let phases = finish_semantic_phase_collection();
    let resource_usage = finish_resource_usage_collection();

    Ok(BenchReport {
        spec,
        samples,
        phases,
        resource_usage,
    })
}

/// Runs a benchmark with setup and teardown.
///
/// Setup runs once before all iterations, teardown runs once after all
/// iterations complete. Neither is included in timing.
///
/// # Arguments
///
/// * `spec` - Benchmark configuration specifying iterations and warmup
/// * `setup` - Function that creates the input data (called once, not timed)
/// * `f` - Benchmark closure that receives a reference to setup result
/// * `teardown` - Function that cleans up the input (called once, not timed)
///
/// # Example
///
/// ```ignore
/// use mobench_sdk::timing::{BenchSpec, run_closure_with_setup_teardown};
///
/// fn setup_db() -> Database { Database::connect("test.db") }
/// fn cleanup_db(db: Database) { db.close(); std::fs::remove_file("test.db").ok(); }
///
/// let spec = BenchSpec::new("db_benchmark", 100, 10)?;
/// let report = run_closure_with_setup_teardown(
///     spec,
///     setup_db,
///     |db| { db.query("SELECT *"); Ok(()) },
///     cleanup_db,
/// )?;
/// ```
pub fn run_closure_with_setup_teardown<S, T, F, D>(
    spec: BenchSpec,
    setup: S,
    mut f: F,
    teardown: D,
) -> Result<BenchReport, TimingError>
where
    S: FnOnce() -> T,
    F: FnMut(&T) -> Result<(), TimingError>,
    D: FnOnce(T),
{
    if spec.iterations == 0 {
        return Err(TimingError::NoIterations {
            count: spec.iterations,
        });
    }

    reset_semantic_phase_collection();
    reset_resource_usage_collection();

    // Setup phase - not timed
    let input = setup();

    // Warmup phase
    for _ in 0..spec.warmup {
        f(&input)?;
    }

    // Measurement phase
    begin_semantic_phase_collection();
    begin_resource_usage_collection();
    let mut samples = Vec::with_capacity(spec.iterations as usize);
    for _ in 0..spec.iterations {
        begin_resource_usage_iteration();
        sample_resource_usage();
        let start = Instant::now();
        if let Err(err) = f(&input) {
            let _ = finish_semantic_phase_collection();
            let _ = finish_resource_usage_collection();
            return Err(err);
        }
        sample_resource_usage();
        end_resource_usage_iteration();
        samples.push(BenchSample::from_duration(start.elapsed()));
    }
    let phases = finish_semantic_phase_collection();
    let resource_usage = finish_resource_usage_collection();

    // Teardown phase - not timed
    teardown(input);

    Ok(BenchReport {
        spec,
        samples,
        phases,
        resource_usage,
    })
}

#[cfg(any(target_os = "ios", target_os = "macos"))]
fn platform_current_process_memory_kb() -> Option<u64> {
    unsafe extern "C" {
        fn proc_pid_rusage(
            pid: libc::c_int,
            flavor: libc::c_int,
            buffer: *mut libc::c_void,
        ) -> libc::c_int;
    }

    let mut info = std::mem::MaybeUninit::<libc::rusage_info_v4>::zeroed();
    let status = unsafe {
        // SAFETY: We pass the current PID, a valid flavor constant for the selected
        // rusage_info_v4 layout, and a properly sized writable buffer.
        proc_pid_rusage(
            libc::getpid(),
            libc::RUSAGE_INFO_V4,
            info.as_mut_ptr().cast(),
        )
    };
    if status != 0 {
        return None;
    }

    // SAFETY: proc_pid_rusage returned success, so the kernel initialized `info`.
    let info = unsafe { info.assume_init() };
    Some(info.ri_phys_footprint / 1024)
}

#[cfg(target_os = "android")]
fn platform_current_process_memory_kb() -> Option<u64> {
    std::fs::read_to_string("/proc/self/status")
        .ok()
        .and_then(|status| parse_proc_status_memory_kb(&status))
}

#[cfg(any(test, target_os = "android"))]
fn parse_proc_status_memory_kb(status: &str) -> Option<u64> {
    status.lines().find_map(|line| {
        let value = line.strip_prefix("VmRSS:")?;
        value.split_whitespace().next()?.parse().ok()
    })
}

#[cfg(not(any(target_os = "ios", target_os = "macos", target_os = "android")))]
fn platform_current_process_memory_kb() -> Option<u64> {
    None
}

#[cfg(unix)]
fn platform_current_process_cpu_ms() -> Option<u64> {
    let mut usage = std::mem::MaybeUninit::<libc::rusage>::zeroed();
    let status = unsafe {
        // SAFETY: We pass a valid pointer to writable storage for `rusage` and use
        // the standard `RUSAGE_SELF` selector for the current process.
        libc::getrusage(libc::RUSAGE_SELF, usage.as_mut_ptr())
    };
    if status != 0 {
        return None;
    }

    // SAFETY: getrusage returned success, so `usage` has been fully initialized.
    let usage = unsafe { usage.assume_init() };
    let user_ms = timeval_to_ms(usage.ru_utime)?;
    let system_ms = timeval_to_ms(usage.ru_stime)?;
    Some(user_ms.saturating_add(system_ms))
}

#[cfg(not(unix))]
fn platform_current_process_cpu_ms() -> Option<u64> {
    None
}

#[cfg(unix)]
fn timeval_to_ms(value: libc::timeval) -> Option<u64> {
    if value.tv_sec < 0 || value.tv_usec < 0 {
        return None;
    }

    Some((value.tv_sec as u64).saturating_mul(1000) + (value.tv_usec as u64) / 1000)
}

#[cfg(test)]
thread_local! {
    static TEST_MEMORY_SAMPLES_KB: RefCell<Option<std::collections::VecDeque<Option<u64>>>> =
        const { RefCell::new(None) };
    static TEST_CPU_SAMPLES_MS: RefCell<Option<std::collections::VecDeque<Option<u64>>>> =
        const { RefCell::new(None) };
}

#[cfg(test)]
fn take_test_memory_sample_kb() -> Option<Option<u64>> {
    TEST_MEMORY_SAMPLES_KB.with(|samples| {
        samples
            .borrow_mut()
            .as_mut()
            .and_then(std::collections::VecDeque::pop_front)
    })
}

#[cfg(test)]
fn take_test_cpu_sample_ms() -> Option<Option<u64>> {
    TEST_CPU_SAMPLES_MS.with(|samples| {
        samples
            .borrow_mut()
            .as_mut()
            .and_then(std::collections::VecDeque::pop_front)
    })
}

#[cfg(test)]
fn set_test_memory_samples_kb<I>(samples: I)
where
    I: IntoIterator<Item = Option<u64>>,
{
    TEST_MEMORY_SAMPLES_KB.with(|state| {
        *state.borrow_mut() = Some(samples.into_iter().collect());
    });
}

#[cfg(test)]
fn set_test_cpu_samples_ms<I>(samples: I)
where
    I: IntoIterator<Item = Option<u64>>,
{
    TEST_CPU_SAMPLES_MS.with(|state| {
        *state.borrow_mut() = Some(samples.into_iter().collect());
    });
}

#[cfg(test)]
fn clear_test_memory_samples_kb() {
    TEST_MEMORY_SAMPLES_KB.with(|state| {
        *state.borrow_mut() = None;
    });
}

#[cfg(test)]
fn clear_test_cpu_samples_ms() {
    TEST_CPU_SAMPLES_MS.with(|state| {
        *state.borrow_mut() = None;
    });
}

fn current_process_memory_kb() -> Option<u64> {
    #[cfg(test)]
    if let Some(sample) = take_test_memory_sample_kb() {
        return sample;
    }

    platform_current_process_memory_kb()
}

fn current_process_cpu_ms() -> Option<u64> {
    #[cfg(test)]
    if let Some(sample) = take_test_cpu_sample_ms() {
        return sample;
    }

    platform_current_process_cpu_ms()
}

#[cfg(test)]
mod tests {
    use super::*;

    struct TestMemorySamplesGuard;

    impl Drop for TestMemorySamplesGuard {
        fn drop(&mut self) {
            clear_test_memory_samples_kb();
            clear_test_cpu_samples_ms();
            reset_resource_usage_collection();
        }
    }

    #[test]
    fn runs_benchmark_collects_requested_samples() {
        let spec = BenchSpec::new("noop", 3, 1).unwrap();
        let report = run_closure(spec, || Ok(())).unwrap();

        assert_eq!(report.samples.len(), 3);
        assert_eq!(report.spec.name, "noop");
        assert_eq!(report.spec.iterations, 3);
    }

    #[test]
    fn rejects_zero_iterations() {
        let result = BenchSpec::new("test", 0, 10);
        assert!(matches!(
            result,
            Err(TimingError::NoIterations { count: 0 })
        ));
    }

    #[test]
    fn allows_zero_warmup() {
        let spec = BenchSpec::new("test", 5, 0).unwrap();
        assert_eq!(spec.warmup, 0);

        let report = run_closure(spec, || Ok(())).unwrap();
        assert_eq!(report.samples.len(), 5);
    }

    #[test]
    fn serializes_to_json() {
        let spec = BenchSpec::new("test", 10, 2).unwrap();
        let report = run_closure(spec, || {
            profile_phase("prove", || std::thread::sleep(Duration::from_millis(1)));
            Ok(())
        })
        .unwrap();

        let json = serde_json::to_string(&report).unwrap();
        let restored: BenchReport = serde_json::from_str(&json).unwrap();

        assert_eq!(restored.spec.name, "test");
        assert_eq!(restored.samples.len(), 10);
        assert_eq!(restored.phases.len(), 1);
        assert_eq!(restored.phases[0].name, "prove");
        assert!(restored.phases[0].duration_ns > 0);
    }

    #[test]
    fn measured_peak_memory_uses_iteration_baseline_only() {
        let _guard = TestMemorySamplesGuard;
        set_test_memory_samples_kb([
            Some(100),
            Some(104),
            Some(120),
            Some(112),
            Some(130),
        ]);

        let spec = BenchSpec::new("mem", 2, 1).unwrap();
        let report = run_closure(spec, || Ok(())).unwrap();

        assert_eq!(
            report.resource_usage,
            Some(BenchResourceUsage {
                cpu_median_ms: None,
                peak_memory_kb: Some(30),
            })
        );
    }

    #[test]
    fn measured_peak_memory_excludes_one_time_setup_and_teardown() {
        let _guard = TestMemorySamplesGuard;
        set_test_memory_samples_kb([Some(220), Some(225), Some(250)]);

        let spec = BenchSpec::new("mem-setup", 1, 0).unwrap();
        let report = run_closure_with_setup_teardown(
            spec,
            || vec![0u8; 1024],
            |_buffer| Ok(()),
            |_buffer| {},
        )
        .unwrap();

        assert_eq!(
            report.resource_usage,
            Some(BenchResourceUsage {
                cpu_median_ms: None,
                peak_memory_kb: Some(30),
            })
        );
    }

    #[test]
    fn measured_peak_memory_excludes_per_iteration_setup() {
        let _guard = TestMemorySamplesGuard;
        set_test_memory_samples_kb([
            Some(100),
            Some(150),
            Some(150),
            Some(160),
            Some(170),
            Some(170),
            Some(190),
        ]);

        let spec = BenchSpec::new("mem-setup-per-iter", 2, 0).unwrap();
        let report = run_closure_with_setup_per_iter(spec, || vec![0u8; 1024], |_buffer| Ok(()))
            .unwrap();

        assert_eq!(
            report.resource_usage,
            Some(BenchResourceUsage {
                cpu_median_ms: None,
                peak_memory_kb: Some(20),
            })
        );
    }

    #[test]
    fn measured_peak_memory_preserves_zero_delta() {
        let _guard = TestMemorySamplesGuard;
        set_test_memory_samples_kb([Some(100), Some(100), Some(100)]);

        let spec = BenchSpec::new("mem-zero", 1, 0).unwrap();
        let report = run_closure(spec, || Ok(())).unwrap();

        assert_eq!(
            report.resource_usage,
            Some(BenchResourceUsage {
                cpu_median_ms: None,
                peak_memory_kb: Some(0),
            })
        );
    }

    #[test]
    fn measured_cpu_median_uses_per_iteration_deltas_only() {
        let _guard = TestMemorySamplesGuard;
        set_test_memory_samples_kb([Some(100); 7]);
        set_test_cpu_samples_ms([
            Some(100),
            Some(106),
            Some(130),
            Some(142),
            Some(200),
            Some(219),
        ]);

        let spec = BenchSpec::new("cpu", 3, 1).unwrap();
        let report = run_closure(spec, || Ok(())).unwrap();

        assert_eq!(
            report.resource_usage,
            Some(BenchResourceUsage {
                cpu_median_ms: Some(12),
                peak_memory_kb: Some(0),
            })
        );
    }

    #[test]
    fn measured_cpu_median_excludes_per_iteration_setup() {
        let _guard = TestMemorySamplesGuard;
        set_test_memory_samples_kb([Some(100); 10]);
        set_test_cpu_samples_ms([
            Some(150),
            Some(158),
            Some(210),
            Some(221),
            Some(280),
            Some(286),
        ]);

        let spec = BenchSpec::new("cpu-setup-per-iter", 3, 0).unwrap();
        let report = run_closure_with_setup_per_iter(spec, || vec![0u8; 1024], |_buffer| Ok(()))
            .unwrap();

        assert_eq!(
            report.resource_usage,
            Some(BenchResourceUsage {
                cpu_median_ms: Some(8),
                peak_memory_kb: Some(0),
            })
        );
    }

    #[test]
    fn parse_proc_status_memory_kb_reads_vm_rss() {
        let status = "\
Name:\ttest\n\
VmPeak:\t   90304 kB\n\
VmRSS:\t   21537 kB\n\
RssAnon:\t   10144 kB\n";

        assert_eq!(parse_proc_status_memory_kb(status), Some(21_537));
    }

    #[test]
    fn parse_proc_status_memory_kb_returns_none_without_vm_rss() {
        let status = "\
Name:\ttest\n\
VmPeak:\t   90304 kB\n\
VmHWM:\t   24000 kB\n";

        assert_eq!(parse_proc_status_memory_kb(status), None);
    }

    #[test]
    fn profile_phase_records_only_measured_iterations() {
        let spec = BenchSpec::new("semantic", 2, 1).unwrap();
        let mut call_index = 0u32;
        let report = run_closure(spec, || {
            let phase_name = if call_index == 0 {
                "warmup-only"
            } else {
                "prove"
            };
            call_index += 1;
            profile_phase(phase_name, || std::thread::sleep(Duration::from_millis(1)));
            Ok(())
        })
        .unwrap();

        assert!(
            !report
                .phases
                .iter()
                .any(|phase| phase.name == "warmup-only"),
            "warmup phases should not be recorded"
        );
        let prove = report
            .phases
            .iter()
            .find(|phase| phase.name == "prove")
            .expect("prove phase");
        assert!(prove.duration_ns > 0);
    }

    #[test]
    fn profile_phase_keeps_the_v1_model_flat() {
        let spec = BenchSpec::new("semantic-flat", 1, 0).unwrap();
        let report = run_closure(spec, || {
            profile_phase("prove", || {
                std::thread::sleep(Duration::from_millis(1));
                profile_phase("inner", || std::thread::sleep(Duration::from_millis(1)));
            });
            Ok(())
        })
        .unwrap();

        assert!(report.phases.iter().any(|phase| phase.name == "prove"));
        assert!(
            !report.phases.iter().any(|phase| phase.name == "inner"),
            "nested phases should not create a second flat phase entry"
        );
    }

    #[test]
    fn run_with_setup_calls_setup_once() {
        use std::sync::atomic::{AtomicU32, Ordering};

        static SETUP_COUNT: AtomicU32 = AtomicU32::new(0);
        static RUN_COUNT: AtomicU32 = AtomicU32::new(0);

        let spec = BenchSpec::new("test", 5, 2).unwrap();
        let report = run_closure_with_setup(
            spec,
            || {
                SETUP_COUNT.fetch_add(1, Ordering::SeqCst);
                vec![1, 2, 3]
            },
            |data| {
                RUN_COUNT.fetch_add(1, Ordering::SeqCst);
                std::hint::black_box(data.len());
                Ok(())
            },
        )
        .unwrap();

        assert_eq!(SETUP_COUNT.load(Ordering::SeqCst), 1); // Setup called once
        assert_eq!(RUN_COUNT.load(Ordering::SeqCst), 7); // 2 warmup + 5 iterations
        assert_eq!(report.samples.len(), 5);
    }

    #[test]
    fn run_with_setup_per_iter_calls_setup_each_time() {
        use std::sync::atomic::{AtomicU32, Ordering};

        static SETUP_COUNT: AtomicU32 = AtomicU32::new(0);

        let spec = BenchSpec::new("test", 3, 1).unwrap();
        let report = run_closure_with_setup_per_iter(
            spec,
            || {
                SETUP_COUNT.fetch_add(1, Ordering::SeqCst);
                vec![1, 2, 3]
            },
            |data| {
                std::hint::black_box(data);
                Ok(())
            },
        )
        .unwrap();

        assert_eq!(SETUP_COUNT.load(Ordering::SeqCst), 4); // 1 warmup + 3 iterations
        assert_eq!(report.samples.len(), 3);
    }

    #[test]
    fn run_with_setup_teardown_calls_both() {
        use std::sync::atomic::{AtomicU32, Ordering};

        static SETUP_COUNT: AtomicU32 = AtomicU32::new(0);
        static TEARDOWN_COUNT: AtomicU32 = AtomicU32::new(0);

        let spec = BenchSpec::new("test", 3, 1).unwrap();
        let report = run_closure_with_setup_teardown(
            spec,
            || {
                SETUP_COUNT.fetch_add(1, Ordering::SeqCst);
                "resource"
            },
            |_resource| Ok(()),
            |_resource| {
                TEARDOWN_COUNT.fetch_add(1, Ordering::SeqCst);
            },
        )
        .unwrap();

        assert_eq!(SETUP_COUNT.load(Ordering::SeqCst), 1);
        assert_eq!(TEARDOWN_COUNT.load(Ordering::SeqCst), 1);
        assert_eq!(report.samples.len(), 3);
    }
}