codspeed-criterion-compat-walltime 4.7.0

use codspeed::instrument_hooks::InstrumentHooks;

use crate::benchmark::BenchmarkConfig;
use crate::codspeed_iter_manual::ManualMeasurement;
use crate::connection::OutgoingMessage;
use crate::measurement::Measurement;
use crate::report::{BenchmarkId, Report, ReportContext};
use crate::{black_box, ActualSamplingMode, Bencher, Criterion};
use std::marker::PhantomData;
use std::time::Duration;

/// PRIVATE
pub(crate) trait Routine<M: Measurement, T: ?Sized> {
    /// PRIVATE
    fn bench(&mut self, m: &M, iters: &[u64], parameter: &T) -> Vec<f64>;
    /// PRIVATE
    fn warm_up(&mut self, m: &M, how_long: Duration, parameter: &T) -> (u64, u64);

    /// CodSpeed addition: returns the manual-mode measurement captured during
    /// the most recent `warm_up`/`bench` call, if the user called
    /// `b.iter_manual_unstable*`. Drained on read so `sample` can take ownership.
    fn take_codspeed_manual(&mut self) -> Option<ManualMeasurement>;

    /// PRIVATE
    fn test(&mut self, m: &M, parameter: &T) {
        self.bench(m, &[1u64], parameter);
    }

    /// Iterates the benchmarked function for a fixed length of time, but takes no measurements.
    /// This keeps the overall benchmark suite runtime constant-ish even when running under a
    /// profiler with an unknown amount of overhead. Since no measurements are taken, it also
    /// reduces the amount of time the execution spends in Criterion.rs code, which should help
    /// show the performance of the benchmarked code more clearly as well.
    fn profile(
        &mut self,
        measurement: &M,
        id: &BenchmarkId,
        criterion: &Criterion<M>,
        report_context: &ReportContext,
        time: Duration,
        parameter: &T,
    ) {
        criterion
            .report
            .profile(id, report_context, time.as_nanos() as f64);

        let mut profile_path = report_context.output_directory.clone();
        if (*crate::CARGO_CRITERION_CONNECTION).is_some() {
            // If connected to cargo-criterion, generate a cargo-criterion-style path.
            // This is kind of a hack.
            profile_path.push("profile");
            profile_path.push(id.as_directory_name());
        } else {
            profile_path.push(id.as_directory_name());
            profile_path.push("profile");
        }
        criterion
            .profiler
            .borrow_mut()
            .start_profiling(id.id(), &profile_path);

        let time = time.as_nanos() as u64;

        // TODO: Some profilers will show the two batches of iterations as
        // being different code-paths even though they aren't really.

        // Get the warmup time for one second
        let (wu_elapsed, wu_iters) = self.warm_up(measurement, Duration::from_secs(1), parameter);
        if wu_elapsed < time {
            // Initial guess for the mean execution time
            let met = wu_elapsed as f64 / wu_iters as f64;

            // Guess how many iterations will be required for the remaining time
            let remaining = (time - wu_elapsed) as f64;

            let iters = remaining / met;
            let iters = iters as u64;

            self.bench(measurement, &[iters], parameter);
        }

        criterion
            .profiler
            .borrow_mut()
            .stop_profiling(id.id(), &profile_path);

        criterion.report.terminated(id, report_context);
    }

    fn sample(
        &mut self,
        measurement: &M,
        id: &BenchmarkId,
        config: &BenchmarkConfig,
        criterion: &Criterion<M>,
        report_context: &ReportContext,
        parameter: &T,
    ) -> (ActualSamplingMode, Box<[f64]>, Box<[f64]>) {
        if config.quick_mode {
            let minimum_bench_duration = Duration::from_millis(100);
            let maximum_bench_duration = config.measurement_time; // default: 5 seconds
            let target_rel_stdev = config.significance_level; // default: 5%, 0.05

            use std::time::Instant;
            let time_start = Instant::now();

            let sq = |val| val * val;
            let mut n = 1;
            let mut t_prev = *self.bench(measurement, &[n], parameter).first().unwrap();

            // Early exit for extremely long running benchmarks:
            if time_start.elapsed() > maximum_bench_duration {
                let iters = vec![n as f64, n as f64].into_boxed_slice();
                // prevent gnuplot bug when all values are equal
                let elapsed = vec![t_prev, t_prev + 0.000001].into_boxed_slice();
                return (ActualSamplingMode::Flat, iters, elapsed);
            }

            // Main data collection loop.
            loop {
                let t_now = *self
                    .bench(measurement, &[n * 2], parameter)
                    .first()
                    .unwrap();
                let t = (t_prev + 2. * t_now) / 5.;
                let stdev = (sq(t_prev - t) + sq(t_now - 2. * t)).sqrt();
                // println!("Sample: {} {:.2}", n, stdev / t);
                let elapsed = time_start.elapsed();
                if (stdev < target_rel_stdev * t && elapsed > minimum_bench_duration)
                    || elapsed > maximum_bench_duration
                {
                    let iters = vec![n as f64, (n * 2) as f64].into_boxed_slice();
                    let elapsed = vec![t_prev, t_now].into_boxed_slice();
                    return (ActualSamplingMode::Linear, iters, elapsed);
                }
                n *= 2;
                t_prev = t_now;
            }
        }
        let wu = config.warm_up_time;
        let m_ns = config.measurement_time.as_nanos();

        // CodSpeed addition: criterion would normally announce its warmup
        // window here, but for `iter_manual_unstable*` the user controls warmup
        // entirely and criterion's `warm_up_time` is meaningless. We delay the
        // banner until after `warm_up()` returns so we can suppress it for
        // manual benches.
        let (wu_elapsed, wu_iters) = self.warm_up(measurement, wu, parameter);

        // CodSpeed addition: if the user called `b.iter_manual_unstable*`, the first
        // closure invocation in `warm_up` already drove the entire benchmark
        // (its own warmup + measurement rounds). Skip the adaptive sampler and
        // return the captured per-round samples directly.
        if let Some(manual) = self.take_codspeed_manual() {
            let n = manual.samples.len() as u64;
            let iters: Vec<f64> = vec![manual.iterations as f64; manual.samples.len()];
            criterion.report.measurement_start(
                id,
                report_context,
                n,
                wu_elapsed as f64,
                n * manual.iterations,
            );
            if let Some(conn) = &criterion.connection {
                conn.send(&OutgoingMessage::MeasurementStart {
                    id: id.into(),
                    sample_count: n,
                    estimate_ns: wu_elapsed as f64,
                    iter_count: n * manual.iterations,
                })
                .unwrap();
            }
            return (
                ActualSamplingMode::Flat,
                iters.into_boxed_slice(),
                manual.samples.into_boxed_slice(),
            );
        }

        // CodSpeed addition: this bench wasn't manual, so emit criterion's
        // usual warmup banner now (post-hoc but still before measurement).
        criterion
            .report
            .warmup(id, report_context, wu.as_nanos() as f64);

        if let Some(conn) = &criterion.connection {
            conn.send(&OutgoingMessage::Warmup {
                id: id.into(),
                nanos: wu.as_nanos() as f64,
            })
            .unwrap();
        }

        if crate::debug_enabled() {
            println!(
                "\nCompleted {} iterations in {} nanoseconds, estimated execution time is {} ns",
                wu_iters,
                wu_elapsed,
                wu_elapsed as f64 / wu_iters as f64
            );
        }

        // Initial guess for the mean execution time
        let met = wu_elapsed as f64 / wu_iters as f64;

        let n = config.sample_size as u64;

        let actual_sampling_mode = config
            .sampling_mode
            .choose_sampling_mode(met, n, m_ns as f64);

        let m_iters = actual_sampling_mode.iteration_counts(met, n, &config.measurement_time);

        let expected_ns = m_iters
            .iter()
            .copied()
            .map(|count| count as f64 * met)
            .sum();

        // Use saturating_add to handle overflow.
        let mut total_iters = 0u64;
        for count in m_iters.iter().copied() {
            total_iters = total_iters.saturating_add(count);
        }

        criterion
            .report
            .measurement_start(id, report_context, n, expected_ns, total_iters);

        if let Some(conn) = &criterion.connection {
            conn.send(&OutgoingMessage::MeasurementStart {
                id: id.into(),
                sample_count: n,
                estimate_ns: expected_ns,
                iter_count: total_iters,
            })
            .unwrap();
        }

        let m_elapsed = {
            let hooks = InstrumentHooks::instance();

            let _ = hooks.start_benchmark();
            let value = self.bench(measurement, &m_iters, parameter);
            let _ = hooks.stop_benchmark();

            value
        };
        let m_iters_f: Vec<f64> = m_iters.iter().map(|&x| x as f64).collect();

        (
            actual_sampling_mode,
            m_iters_f.into_boxed_slice(),
            m_elapsed.into_boxed_slice(),
        )
    }
}

pub struct Function<M: Measurement, F, T>
where
    F: FnMut(&mut Bencher<'_, M>, &T),
    T: ?Sized,
{
    f: F,
    // CodSpeed addition: stashed by `bench`/`warm_up` when the user calls
    // `b.iter_manual_unstable*`. Drained via `take_codspeed_manual`.
    codspeed_manual: Option<ManualMeasurement>,
    // TODO: Is there some way to remove these?
    _phantom: PhantomData<T>,
    _phamtom2: PhantomData<M>,
}
impl<M: Measurement, F, T> Function<M, F, T>
where
    F: FnMut(&mut Bencher<'_, M>, &T),
    T: ?Sized,
{
    pub fn new(f: F) -> Function<M, F, T> {
        Function {
            f,
            codspeed_manual: None,
            _phantom: PhantomData,
            _phamtom2: PhantomData,
        }
    }
}

impl<M: Measurement, F, T> Routine<M, T> for Function<M, F, T>
where
    F: FnMut(&mut Bencher<'_, M>, &T),
    T: ?Sized,
{
    fn bench(&mut self, m: &M, iters: &[u64], parameter: &T) -> Vec<f64> {
        let f = &mut self.f;

        let mut b = Bencher {
            iterated: false,
            iters: 0,
            value: m.zero(),
            measurement: m,
            elapsed_time: Duration::from_millis(0),
            codspeed_manual: None,
        };

        iters
            .iter()
            .map(|iters| {
                b.iters = *iters;
                (*f)(&mut b, black_box(parameter));
                b.assert_iterated();
                m.to_f64(&b.value)
            })
            .collect()
    }

    fn warm_up(&mut self, m: &M, how_long: Duration, parameter: &T) -> (u64, u64) {
        let f = &mut self.f;
        let mut b = Bencher {
            iterated: false,
            iters: 1,
            value: m.zero(),
            measurement: m,
            elapsed_time: Duration::from_millis(0),
            codspeed_manual: None,
        };

        let mut total_iters = 0;
        let mut elapsed_time = Duration::from_millis(0);
        loop {
            (*f)(&mut b, black_box(parameter));

            b.assert_iterated();

            // CodSpeed addition: if the user called `b.iter_manual_unstable*`, the
            // closure already drove the whole benchmark. Hand the captured
            // measurement back to `sample()` and return immediately instead of
            // looping until `how_long` elapses.
            if let Some(manual) = b.codspeed_manual.take() {
                self.codspeed_manual = Some(manual);
                return (b.elapsed_time.as_nanos() as u64, b.iters.max(1));
            }

            total_iters += b.iters;
            elapsed_time += b.elapsed_time;
            if elapsed_time > how_long {
                return (elapsed_time.as_nanos() as u64, total_iters);
            }

            b.iters = b.iters.wrapping_mul(2);
        }
    }

    fn take_codspeed_manual(&mut self) -> Option<ManualMeasurement> {
        self.codspeed_manual.take()
    }
}