bigoish 0.1.3 - Docs.rs

#[cfg(feature = "plots")]
use std::{io::IsTerminal, sync::LazyLock};

#[cfg(feature = "plots")]
use textplots::{Chart, ColorPlot, Shape};

use crate::KnownModels;
use crate::fit::{BestFit, find_best_fit};
use crate::measure::measure_complexity;
use crate::model::Model;

#[cfg(feature = "plots")]
static USE_PLOTS: LazyLock<bool> = LazyLock::new(|| {
    std::io::stdout().is_terminal()
        && std::env::var("NO_COLOR").is_err()
        && std::env::var("TERM").unwrap_or_default() != "dumb"
});

#[cfg(feature = "plots")]
/// Convert floats to ints.
fn to_ints(vals: &[f64]) -> impl Iterator<Item = i64> {
    vals.iter().map(|x| *x as i64)
}

/// The result of fitting a model.
#[cfg_attr(not(feature = "plots"), expect(dead_code))]
#[derive(Debug)]
struct FittingWithMeasurements {
    /// The result of fitting.
    best_fit: BestFit,
    /// Input lengths.
    xs: Vec<f64>,
    /// Corresponding time complexity for `xs`.
    ys: Vec<f64>,
}

/// Create a series of size/input pairs suitable for [`assert_best_fit`].
///
/// Every 10 inputs will result in input sizes that are ten times larger, to
/// ensure fitting the models across multiple magnitudes.
///
/// ```rust
/// # use bigoish::growing_inputs;
/// let input_sizes_and_values = growing_inputs(
///     10,
///     |n| vec![0u64; n],
///     5
/// );
/// assert_eq!(
///        input_sizes_and_values.into_iter().collect::<Vec<_>>(),
///        vec![
///            (10, vec![0u64; 10]),
///            (13, vec![0; 13]),
///            (16, vec![0; 16]),
///            (20, vec![0; 20]),
///            (25, vec![0; 25]),
///        ],
/// )
/// ```
///
/// Parameters:
///
/// * `initial_input_size`: The smallest input size to use.
/// * `create_input`: A function or closure that converts an input size into an input.
/// * `num_inputs`: The number of inputs to create in the resulting `Iterator`.
pub fn growing_inputs<C: 'static + Fn(usize) -> T, T: Clone>(
    initial_input_size: usize,
    create_input: C,
    num_inputs: usize,
) -> impl IntoIterator<Item = (usize, T)> {
    // Grow 10× after 10 inputs:
    let growth_factor = 10.0f64.powf(0.1);
    (0..num_inputs).map(move |i: usize| {
        let len = ((initial_input_size as f64) * (growth_factor.powi(i as i32))).round() as usize;
        (len, (create_input)(len))
    })
}

/// Measure a function's complexity, and then try to find the best fit given
/// known models.
///
/// See [assert_best_fit] for documentation of parameters.
fn measure_and_find_better_fit<M, F, I, T, R>(
    expected_model: M,
    function: F,
    input_sizes_and_values: I,
    known_models: KnownModels,
) -> FittingWithMeasurements
where
    M: Model + 'static,
    F: Fn(T) -> R,
    I: IntoIterator<Item = (usize, T)>,
    T: Clone,
{
    let measurements = measure_complexity(function, input_sizes_and_values.into_iter());
    let xs = measurements.xs;
    let ys = measurements.ys;
    let best_fit = find_best_fit(expected_model, &xs, &ys, known_models);
    FittingWithMeasurements { best_fit, xs, ys }
}

/// Assert the given computational complexity model is the best fit for the given function's scalability, compared against a reasonable default set of [`Model`]s.
///
/// The function tries to fit pairs of `(input size, measured runtime of function(input))` to the given model, as well as to additional default models, and asserts that the best fit is the [`Model`] you passed in.
///
/// The default set of comparison [`Model`]s come from [`KnownModels::default()`].
/// If you wish to add more [`Model`]s to compare against, use [`assert_best_fit_vs_models`].
///
/// The more inputs, and the larger they are, the better; you'll also want the input sizes to span multiple orders of magnitude.
/// Larger inputs will make test run more slowly, however.
///
/// Parameters:
/// * `expected_model`: The expected computational complexity model.
/// * `function`: The function whose computational complexity you are testing.
/// * `input_sizes_and_values`: An `IntoIterator` of pairs of `(input size, input)`.
///   The `function` parameter will be repeatedly called with these inputs.
///   You can use [`growing_inputs`] to succinctly generate these pairs.
///
/// ## Panics
///
/// If the best-fitting model is not the expected one.
pub fn assert_best_fit<M, F, T, R, I>(expected_model: M, function: F, input_sizes_and_values: I)
where
    M: Model + 'static,
    F: Fn(T) -> R,
    I: IntoIterator<Item = (usize, T)>,
    T: Clone,
{
    assert_best_fit_vs_models(
        expected_model,
        function,
        input_sizes_and_values,
        KnownModels::default(),
    );
}

/// Like [`assert_best_fit`], but allows you to override the set of known models to compare against.
pub fn assert_best_fit_vs_models<M, F, T, R, I>(
    expected_model: M,
    function: F,
    input_sizes_and_values: I,
    known_models: KnownModels,
) where
    M: Model + 'static,
    F: Fn(T) -> R,
    I: IntoIterator<Item = (usize, T)>,
    T: Clone,
{
    let expected_string = expected_model.to_string();
    let maybe_better = measure_and_find_better_fit(
        expected_model,
        function,
        input_sizes_and_values,
        known_models,
    );
    let (not_found, better_fitting) = match maybe_better.best_fit {
        BestFit::ExpectedModel => return,
        BestFit::AnotherKnownModel { best_known } => (false, best_known),
        BestFit::NotFound { best_known } => (true, best_known),
    };
    let best_model = &better_fitting.model;
    #[cfg(feature = "plots")]
    if *USE_PLOTS {
        let xs = &maybe_better.xs;
        let ys = &maybe_better.ys;
        println!(
            "\nRed dots: real data\nBlue line: best fitted model {}",
            best_model.to_string()
        );
        let points = xs
            .iter()
            .zip(ys.iter())
            .map(|(x, y)| (*x as f32, *y as f32))
            .collect::<Vec<(f32, f32)>>();
        let combo_results = to_ints(ys)
            .chain(
                xs.iter()
                    .map(|x| better_fitting.scaled_complexity(*x) as i64),
            )
            .collect::<Vec<_>>();
        let mut chart = Chart::new_with_y_range(
            120,
            60,
            to_ints(xs).min().unwrap() as f32 * 0.9,
            to_ints(xs).max().unwrap() as f32 * 1.1,
            *combo_results.iter().min().unwrap() as f32 * 0.9,
            *combo_results.iter().max().unwrap() as f32 * 1.1,
        );
        chart.borders();
        chart
            .linecolorplot(
                &Shape::Continuous(Box::new(|n| {
                    better_fitting.scaled_complexity(n as f64) as f32
                })),
                rgb::RGB8 { r: 0, g: 0, b: 255 },
            )
            .linecolorplot(&Shape::Points(&points), rgb::RGB8 { r: 255, g: 0, b: 0 })
            .display();
    }
    panic!(
        "The best known matching model is: {}{}\nThis didn't match the expected model: {}",
        best_model.to_string(),
        if not_found {
            "\nHowever, even this model failed validation and is not a sufficient fit; the real complexity model appears to be even worse!"
        } else {
            ""
        },
        expected_string,
    );
}

#[cfg(test)]
mod tests {
    use std::{hint::black_box, iter::repeat_with};

    use super::*;
    use crate::{
        BoxedModel,
        model::{Constant, Log, N},
    };

    fn make_vec(n: usize) -> Vec<i64> {
        repeat_with(|| fastrand::i64(..)).take(n).collect()
    }

    fn sort(mut v: Vec<i64>) -> Vec<i64> {
        v.sort();
        v
    }

    /// Make sure sorting has the expected complexity: n*logn.
    #[test]
    fn check_expected_complexity_sort() {
        check_expected_complexity_on_vecs(sort, N * Log(N), 200, 25);
        assert_best_fit(N * Log(N), sort, growing_inputs(100, make_vec, 25));
    }

    /// Make sure searching a vector has the expected complexity: n.
    #[test]
    fn check_expected_complexity_search() {
        let search = |v: Vec<i64>| {
            for _ in 0..100 {
                black_box(black_box(&v).iter().position(|i| i == &123));
            }
        };
        check_expected_complexity_on_vecs(search, N, 100, 20);
        assert_best_fit(N, search, growing_inputs(100, make_vec, 20));
    }

    /// For a function with known complexity that runs on a Vec<i64>, make sure
    /// we get the expected complexity.
    fn check_expected_complexity_on_vecs<R, F: Fn(Vec<i64>) -> R, M: Model + 'static>(
        function: F,
        expected_complexity: M,
        start_size: usize,
        num_items: usize,
    ) {
        let BestFit::AnotherKnownModel { best_known } = measure_and_find_better_fit(
            Constant,
            function,
            growing_inputs(start_size, make_vec, num_items),
            KnownModels::default(),
        )
        .best_fit
        else {
            panic!("wrong result")
        };
        let expected_complexity = BoxedModel::new(expected_complexity);
        assert!(
            best_known.model == expected_complexity,
            "{:?} != {}",
            best_known.model,
            expected_complexity.to_string()
        );
    }

    #[should_panic(
        expected = "The best known matching model is: n*log(n)\nThis didn't match the expected model: n"
    )]
    #[test]
    fn assert_panics_on_wrong_options() {
        assert_best_fit(N, sort, growing_inputs(200, make_vec, 25));
    }

    #[should_panic(
        expected = "The best known matching model is: n*log(n)\nThis didn't match the expected model: n"
    )]
    #[test]
    fn assert_panics_on_wrong_options_with_models() {
        assert_best_fit_vs_models(
            N,
            sort,
            growing_inputs(200, make_vec, 25),
            KnownModels::default(),
        );
    }

    /// If an empty `KnownModels` is given, the given `Model` is the best by
    /// default, but shouldn't validate.
    #[should_panic(expected = "is not a sufficient fit")]
    #[test]
    fn best_we_can_do() {
        assert_best_fit_vs_models(
            N,
            sort,
            growing_inputs(200, make_vec, 25),
            KnownModels::new(),
        );
    }
}