bigoish 0.1.1

Test the computational complexity (big-O) of Rust algorithms
Documentation
//! Measure time complexity of functions.

use std::io::{Error as IoError, ErrorKind as IoErrorKind, Result as IoResult};
use std::time::Duration;
use std::{hint::black_box, sync::LazyLock};

#[cfg(target_os = "linux")]
use perf_event::{Builder, Group, events::Hardware};

use cpu_time::ThreadTime;

#[cfg(not(target_os = "linux"))]
fn use_instruction_count() -> bool {
    false
}

#[cfg(target_os = "linux")]
fn use_instruction_count() -> bool {
    if std::env::var("BIGOISH_TIME_ONLY").unwrap_or_default() == "1" {
        return false;
    }
    fn spin(_n: usize) {
        let start = std::time::Instant::now();
        while start.elapsed().as_nanos() < 500_000 {}
    }
    match measure_cpu_instructions(&spin, 1) {
        Ok(insts) if insts > 0.0 => {
            std::thread::sleep(Duration::from_millis(10));
            true
        }
        _ => false,
    }
}

static USE_INSTRUCTION_COUNT: LazyLock<bool> = LazyLock::new(use_instruction_count);

/// Input sizes and corresponding time complexity.
pub(crate) struct Measurements {
    pub xs: Vec<f64>,
    pub ys: Vec<f64>,
    pub method: Method,
}

/// The method used to measure complexity.
#[derive(PartialEq, Debug)]
pub(crate) enum Method {
    Time,
    Instructions,
}

/// Given a function, a set of inputs, and a way to measure input length, return
/// the cost (time complexity) of running the function.
///
/// * On Windows and macOS, the cost is elapsed CPU time in nanoseconds.
/// * On Linux, CPU instruction count is used if possible using
///   `perf_event_open()` syscall. When this is not possible, for example GitHub
///   Actions doesn't support it, falls back to elapsed CPU time in nanoseconds.
pub(crate) fn measure_complexity<T, R, F, I>(function: F, inputs: I) -> Measurements
where
    F: Fn(T) -> R,
    I: Iterator<Item = (usize, T)>,
    T: Clone,
{
    let mut result = Measurements {
        xs: vec![],
        ys: vec![],
        method: Method::Time,
    };

    for (len, input) in inputs {
        if *USE_INSTRUCTION_COUNT {
            result.method = Method::Instructions;
            // Maybe could handle this more gracefully by restarting and doing
            // time measurement, but given we check it in USE_INSTRUCTION_COUNT
            // chances are if it worked then it'll work now.
            let instructions = measure_cpu_instructions(&function, input)
                .expect("Unexpected error getting CPU instruction count");
            result.xs.push(len as f64);
            result.ys.push(instructions);
        } else if let Some(nanos) = measure_time(&function, input) {
            result.xs.push(len as f64);
            result.ys.push(nanos);
        }
    }
    result
}

/// Run the function multiple times, return mean elapsed nanoseconds.
pub(crate) fn measure_time<T, R, F>(function: &F, input: T) -> Option<f64>
where
    F: Fn(T) -> R,
    T: Clone,
{
    const ITERATIONS: usize = 20;
    let inputs = vec![input; ITERATIONS];
    let start = ThreadTime::now();
    for input in inputs {
        black_box(function(black_box(input)));
    }
    let nanos = start.elapsed().as_nanos();
    let result = (nanos as f64) / ITERATIONS as f64;
    if result < 5000. {
        eprintln!(
            "Skipping input with CPU time measurement of {result} nanoseconds, \
             as it is less than 5000 nanoseconds."
        );
        return None;
    }
    Some(result)
}

#[cfg(target_os = "linux")]
/// Run the function multiple times, return CPU instruction count.
///
/// The result only makes sense if the current thread is pinned to a single CPU.
fn measure_cpu_instructions<T, R, F>(function: &F, input: T) -> IoResult<f64>
where
    F: Fn(T) -> R,
    T: Clone,
{
    // If there are insufficient hardware counters available, the counter
    // might be used only part of the time. In other cases the counter won't
    // be used at all. So spin until real data is available.
    let mut measurements = vec![];
    for _ in 0..100 {
        let new_input = input.clone();
        let mut group = Group::new()?;
        let mut instructions = Builder::new()
            .group(&mut group)
            .kind(Hardware::INSTRUCTIONS)
            .observe_pid(unsafe { libc::gettid() } as i32)
            .build()?; //
        group.enable()?;
        black_box(function(black_box(new_input)));
        let data = instructions.read_count_and_time()?;
        group.disable()?;
        if data.time_enabled == data.time_running {
            measurements.push(data.count);
            if measurements.len() == 21 {
                break;
            }
        } else if data.time_running == 0 {
            // Didn't work at all. Try to allow time to pass and maybe that'll
            // free something up; maybe we'll switch cores, maybe whatever
            // kernel/hardware/??? state will get cleared, something.
            // Empirically this helps prevent infinite number of 0 reads.
            drop(instructions);
            drop(group);
            std::thread::sleep(Duration::from_millis(100));
        };
    }
    if let Some(max_count) = measurements.iter().max() {
        Ok(*max_count as f64)
    } else {
        // TODO once there is way to fallback to time measurements with env
        // variable, suggest that?
        Err(IoError::new(
            IoErrorKind::TimedOut,
            "Failed to get any meaningful CPU instruction measurements",
        ))
    }
}

#[cfg(not(target_os = "linux"))]
/// Run the function multiple times, return CPU instruction count.
///
/// The result only makes sense if the current thread is pinned to a single CPU.
fn measure_cpu_instructions<T, R, F>(function: &F, input: T) -> IoResult<f64>
where
    F: Fn(T) -> R,
    T: Clone,
{
    Err(std::io::Error::new(
        std::io::ErrorKind::Unsupported,
        "Only supported on Linux",
    ))
}

#[cfg(test)]
mod tests {
    use std::time::{Duration, Instant};

    use super::*;

    /// Assert the two values are within a given fraction of each other.
    macro_rules! assert_close {
        ($a:expr,$b:expr,$fraction:literal) => {{
            let a = $a;
            let b = $b;
            assert!(
                (1.0 - a / b).abs() < $fraction,
                "{a} is more than {}% away from {b}",
                $fraction * 100.0
            );
        }};
    }

    /// Spin for given number of nanoseconds.
    fn spin_nanos(n: usize) {
        let start = Instant::now();
        while start.elapsed().as_nanos() < n as u128 {}
    }

    /// measure_time() aproximates elapsed time in nanoseconds.
    #[test]
    fn measuring_time() {
        let elapsed = measure_time(&spin_nanos, 10_000);
        assert_close!(elapsed.unwrap(), 10_000., 0.1);
        assert_eq!(measure_time(&spin_nanos, 100), None);
    }

    /// measure_complexity() returns input lengths as `xs`, and a measure the
    /// scales with CPU time as `ys`.
    #[test]
    fn measuring_complexity() {
        let inputs = [(500usize, 50u64), (5000, 500), (50_000, 5000)];
        let function = |n| spin_nanos((n * 10_000) as usize);
        let measurements = measure_complexity(function, inputs.into_iter());
        assert_eq!(&measurements.xs, &[500., 5000., 50_000.]);
        for i in 1i32..3 {
            assert_close!(
                measurements.ys[i as usize],
                measurements.ys[0] * 10f64.powi(i),
                0.75
            );
        }

        assert_eq!(
            measurements.method,
            if *USE_INSTRUCTION_COUNT {
                Method::Instructions
            } else {
                Method::Time
            }
        );
    }

    /// measure_complexity() doesn't count sleeps.
    #[test]
    fn measure_complexity_cpu_only() {
        fn spin_and_sleep(n: usize) {
            if n == 0 {
                spin_nanos(1_000_000);
            } else {
                spin_nanos(5_000_000);
                std::thread::sleep(Duration::from_nanos(5_000_000));
                spin_nanos(5_000_000);
            }
        }
        let inputs = [(1, 1), (2, 2)];
        let measurements = measure_complexity(spin_and_sleep, inputs.into_iter());
        assert_close!(measurements.ys[0], measurements.ys[1], 0.1);
    }
}