bigoish 0.1.1 - Docs.rs

use std::{io::IsTerminal, sync::LazyLock};

use textplots::{Chart, ColorPlot, Shape};

use crate::KnownModels;
use crate::measure::measure_complexity;
use crate::model::{BoxedModel, Model};

static USE_COLORS: LazyLock<bool> = LazyLock::new(|| {
    std::io::stdout().is_terminal()
        && std::env::var("NO_COLOR").is_err()
        && std::env::var("TERM").unwrap_or_default() != "dumb"
});

/// Convert floats to ints.
fn to_ints(vals: &[f64]) -> impl Iterator<Item = i64> {
    vals.iter().map(|x| *x as i64)
}

/// The result of fitting a model.
struct FittingWithMeasurements {
    /// The scaling parameters.
    pub scaling_params: Vec<f64>,
    /// The best model, to be scaled by the `scaling_params`.
    model: BoxedModel,
    /// Input lengths.
    xs: Vec<f64>,
    /// Corresponding time complexity for `xs`.
    ys: Vec<f64>,
}

/// Create a series of size/input pairs suitable for [`assert_best_fit`].
///
/// Every 10 inputs will result in input sizes that are ten times larger, to
/// ensure fitting the models across multiple magnitudes.
///
/// ```rust
/// # use bigoish::growing_inputs;
/// let input_sizes_and_values = growing_inputs(
///     10,
///     |n| vec![0u64; n],
///     5
/// );
/// assert_eq!(
///        input_sizes_and_values.into_iter().collect::<Vec<_>>(),
///        vec![
///            (10, vec![0u64; 10]),
///            (13, vec![0; 13]),
///            (16, vec![0; 16]),
///            (20, vec![0; 20]),
///            (25, vec![0; 25]),
///        ],
/// )
/// ```
///
/// Parameters:
///
/// * `initial_input_size`: The smallest input size to use.
/// * `create_input`: A function or closure that converts an input size into an input.
/// * `num_inputs`: The number of inputs to create in the resulting `Iterator`.
pub fn growing_inputs<C: 'static + Fn(usize) -> T, T: Clone>(
    initial_input_size: usize,
    create_input: C,
    num_inputs: usize,
) -> impl IntoIterator<Item = (usize, T)> {
    // Grow 10× after 10 inputs:
    let growth_factor = 10.0f64.powf(0.1);
    (0..num_inputs).map(move |i: usize| {
        let len = ((initial_input_size as f64) * (growth_factor.powi(i as i32))).round() as usize;
        (len, (create_input)(len))
    })
}

/// Check if there's a better fit in known default models.
///
/// `None` implies the given model was the best, otherwise the result includes
/// the better fitting model.
///
/// See [assert_best_fit] for documentation of parameters.
fn find_better_fit<M, F, I, T, R>(
    expected_model: M,
    function: F,
    input_sizes_and_values: I,
    known_models: KnownModels,
) -> Option<FittingWithMeasurements>
where
    M: Model + 'static,
    F: Fn(T) -> R,
    I: IntoIterator<Item = (usize, T)>,
    T: Clone,
{
    let measurements = measure_complexity(function, input_sizes_and_values.into_iter());
    let xs = measurements.xs;
    let ys = measurements.ys;
    crate::fit::find_better_fit(expected_model, &xs, &ys, known_models).map(|result| {
        FittingWithMeasurements {
            scaling_params: result.scaling_params,
            model: result.model,
            xs,
            ys,
        }
    })
}

/// Assert the given computational complexity model is the best fit for the given function's scalability, compared against a reasonable default set of [`Model`]s.
///
/// The function tries to fit pairs of `(input size, measured runtime of function(input))` to the given model, as well as to additional default models, and asserts that the best fit is the [`Model`] you passed in.
///
/// The default set of comparison [`Model`]s come from [`KnownModels::default()`].
/// If you wish to add more [`Model`]s to compare against, use [`assert_best_fit_vs_models`].
///
/// The more inputs, and the larger they are, the better; you'll also want the input sizes to span multiple orders of magnitude.
/// Larger inputs will make test run more slowly, however.
///
/// Parameters:
/// * `expected_model`: The expected computational complexity model.
/// * `function`: The function whose computational complexity you are testing.
/// * `input_sizes_and_values`: An `IntoIterator` of pairs of `(input size, input)`.
///   The `function` parameter will be repeatedly called with these inputs.
///   You can use [`growing_inputs`] to succinctly generate these pairs.
///
/// ## Panics
///
/// If the best-fitting model is not the expected one.
pub fn assert_best_fit<M, F, T, R, I>(expected_model: M, function: F, input_sizes_and_values: I)
where
    M: Model + 'static,
    F: Fn(T) -> R,
    I: IntoIterator<Item = (usize, T)>,
    T: Clone,
{
    assert_best_fit_vs_models(
        expected_model,
        function,
        input_sizes_and_values,
        KnownModels::default(),
    );
}

/// Like [`assert_best_fit`], but allows you to override the set of known models to compare against.
pub fn assert_best_fit_vs_models<M, F, T, R, I>(
    expected_model: M,
    function: F,
    input_sizes_and_values: I,
    known_models: KnownModels,
) where
    M: Model + 'static,
    F: Fn(T) -> R,
    I: IntoIterator<Item = (usize, T)>,
    T: Clone,
{
    let expected_string = expected_model.to_string();
    let maybe_better = find_better_fit(
        expected_model,
        function,
        input_sizes_and_values,
        known_models,
    );
    if let Some(better_fitting) = maybe_better {
        let best_model = better_fitting.model;
        let xs = &better_fitting.xs;
        let ys = &better_fitting.ys;
        if *USE_COLORS {
            println!(
                "\nRed dots: real data\nBlue line: best fitted model {}",
                best_model.to_string()
            );
            let params = &better_fitting.scaling_params;
            let calc_complexity = |n: f64| -> f64 {
                if best_model.constant() {
                    params[0]
                } else {
                    params[0] + params[1] * best_model.complexity(n)
                }
            };
            let points = xs
                .iter()
                .zip(ys.iter())
                .map(|(x, y)| (*x as f32, *y as f32))
                .collect::<Vec<(f32, f32)>>();
            let combo_results = to_ints(ys)
                .chain(xs.iter().map(|x| calc_complexity(*x) as i64))
                .collect::<Vec<_>>();
            let mut chart = Chart::new_with_y_range(
                120,
                60,
                to_ints(xs).min().unwrap() as f32 * 0.9,
                to_ints(xs).max().unwrap() as f32 * 1.1,
                *combo_results.iter().min().unwrap() as f32 * 0.9,
                *combo_results.iter().max().unwrap() as f32 * 1.1,
            );
            chart.borders();
            chart
                .linecolorplot(
                    &Shape::Continuous(Box::new(|n| calc_complexity(n as f64) as f32)),
                    rgb::RGB8 { r: 0, g: 0, b: 255 },
                )
                .linecolorplot(&Shape::Points(&points), rgb::RGB8 { r: 255, g: 0, b: 0 })
                .display();
        }
        panic!(
            "The best matching model is: {}\nThis didn't match the expected model: {}",
            best_model.to_string(),
            expected_string
        );
    }
}

#[cfg(test)]
mod tests {
    use std::iter::repeat_with;

    use super::*;
    use crate::model::{Constant, Log, N};

    fn make_vec(n: usize) -> Vec<i64> {
        repeat_with(|| fastrand::i64(..)).take(n).collect()
    }

    fn sort(mut v: Vec<i64>) -> Vec<i64> {
        v.sort();
        v
    }

    /// Make sure sorting has the expected complexity: n*logn.
    #[test]
    fn check_expected_complexity_sort() {
        check_expected_complexity_on_vecs(sort, N * Log(N), 25);
        assert_best_fit(N * Log(N), sort, growing_inputs(100, make_vec, 25));
    }

    /// Make sure searching a vector has the expected complexity: n.
    #[test]
    fn check_expected_complexity_search() {
        check_expected_complexity_on_vecs(|v| v.into_iter().position(|i| i == 123), N, 40);
    }

    /// For a function with known complexity that runs on a Vec<i64>, make sure
    /// we get the expected complexity.
    fn check_expected_complexity_on_vecs<R, F: Fn(Vec<i64>) -> R, M: Model + 'static>(
        function: F,
        expected_complexity: M,
        num_items: usize,
    ) {
        let fit = find_better_fit(
            Constant,
            function,
            growing_inputs(100, make_vec, num_items),
            KnownModels::default(),
        )
        .unwrap_or_else(|| {
            panic!(
                "Unexpectedly estimated constant complexity, i.e. O(1), instead of {}",
                expected_complexity.to_string()
            )
        });
        let expected_complexity = BoxedModel::new(expected_complexity);
        assert!(
            fit.model == expected_complexity,
            "{:?} != {}",
            fit.model,
            expected_complexity.to_string()
        );
    }

    #[should_panic(
        expected = "The best matching model is: n*log(n)\nThis didn't match the expected model: n"
    )]
    #[test]
    fn assert_panics_on_wrong_options() {
        assert_best_fit(N, sort, growing_inputs(100, make_vec, 25));
    }

    #[should_panic(
        expected = "The best matching model is: n*log(n)\nThis didn't match the expected model: n"
    )]
    #[test]
    fn assert_panics_on_wrong_options_with_models() {
        assert_best_fit_vs_models(
            N,
            sort,
            growing_inputs(100, make_vec, 25),
            KnownModels::default(),
        );
    }

    /// If an empty `KnownModels` is given, the given `Model` is the best by
    /// default.
    #[test]
    fn best_we_can_do() {
        assert_best_fit_vs_models(
            N,
            sort,
            growing_inputs(100, make_vec, 25),
            KnownModels::new(),
        );
    }
}