bigoish 0.1.1 - Docs.rs

//! Fit `(x, y)` pairs to a complexity model.

use rmpfit::{MPError, MPFitter, MPPar, MPResult};

use crate::model::*;

/// The real results that we're trying to fit a model to.
struct FitToResults<'a> {
    /// The model we're trying to fit:
    pub model: &'a dyn Model,
    /// Sizes of inputs to the real function.
    xs: &'a [f64],
    /// For each input size in `xs`, the corresponding cost (elapsed time, or
    /// CPU instructions, or some other cost measure) of calculating the result.
    ys: &'a [f64],
    /// The parameter bounds for fitting.
    param_bounds: Vec<MPPar>,
}

impl MPFitter for FitToResults<'_> {
    fn eval(&mut self, params: &[f64], deviates: &mut [f64]) -> MPResult<()> {
        for ((d, x), y) in deviates.iter_mut().zip(self.xs.iter()).zip(self.ys.iter()) {
            let f = if self.model.constant() {
                params[0]
            } else {
                params[0] + params[1] * self.model.complexity(*x)
            };
            // We could divide by a sigma, but we expect large values
            // (nanoseconds, CPU instructions), so no need to scale them up.
            *d = *y - f;
        }
        Ok(())
    }

    fn number_of_points(&self) -> usize {
        self.xs.len()
    }

    fn parameters(&self) -> Option<&[rmpfit::MPPar]> {
        Some(&self.param_bounds)
    }
}

#[derive(Debug)]
pub struct FittingError {
    error: MPError,
}

impl std::fmt::Display for FittingError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{}", self.error)
    }
}

/// Calculate Akaike infomation criteria (AIC); lower is better.
fn aic(num_parameters: usize, num_points: usize, rss: f64) -> f64 {
    let num_points = num_points as f64;
    let num_parameters = num_parameters as f64;
    2.0 * num_parameters + num_points * (rss / num_points).ln()
}

/// The result of fitting a model.
pub(crate) struct Fitting {
    /// The scaling parameters.
    pub scaling_params: Vec<f64>,
    /// The AIC; lower means better fit.
    pub aic: f64,
    /// The model used for the fitting.
    pub model: BoxedModel,
}

/// Fit a model to matched real inputs and outputs.
///
/// The parameters that are being fit are `a`, `b`, and `extra_params`, for the
/// function `a + b * model.complexity(x, extra_params)` with the exception that
/// for `Constant` it's just matched to a single parameter `a`). `a` and `b`
/// will be stored in the result's `scaling_params`.
fn fit(model: BoxedModel, xs: &[f64], ys: &[f64]) -> Result<Fitting, FittingError> {
    assert_eq!(xs.len(), ys.len());
    let mut fitter = FitToResults {
        model: &model as &dyn Model,
        xs,
        ys,
        param_bounds: vec![],
    };
    let num_params = if fitter.model.constant() { 1 } else { 2 };
    for _ in 0..num_params {
        let param = MPPar {
            limited_low: true,
            limit_low: 0.0,
            ..Default::default()
        };
        fitter.param_bounds.push(param)
    }

    let mut params = vec![1.0; num_params];
    match fitter.mpfit(&mut params) {
        Ok(status) => Ok(Fitting {
            scaling_params: params,
            aic: aic(
                num_params,
                xs.len(),
                status.resid.iter().map(|x| x * x).sum(),
            ),
            model,
        }),
        Err(error) => Err(FittingError { error }),
    }
}

/// Check if there's a better fit in known default models.
///
/// `None` implies the given model was the best.
pub(crate) fn find_better_fit<M: Model + 'static>(
    expected_model: M,
    xs: &[f64],
    ys: &[f64],
    known_models: KnownModels,
) -> Option<Fitting> {
    // None means expected_fitting is best:
    let mut expected_fitting = None;
    let expected_model = BoxedModel::new(expected_model);
    let mut best_fitting = fit(expected_model, xs, ys).expect("Failed to fit to the model.");
    for model in known_models.into_iter() {
        if let Ok(fitting) = fit(model, xs, ys) {
            if fitting.aic < best_fitting.aic {
                // The first time we find a better model fit, store
                // original fitting back where it came from.
                if expected_fitting.is_none() {
                    expected_fitting = Some(best_fitting);
                }
                best_fitting = fitting;
            }
        };
    }
    // TODO When MSRV hits 1.88 this can be merged into one if.
    if let Some(expected_fitting) = expected_fitting {
        if expected_fitting.model != best_fitting.model {
            return Some(best_fitting);
        }
    }
    None
}

#[cfg(test)]
mod tests {
    use super::*;
    use rstest::rstest;

    /// If there is (somehow) a downslope, best we can do is constant model.
    #[test]
    fn downslope_disallowed() {
        let xs = [10., 20., 30., 40.];
        let ys = [100., 90., 80., 70.];
        assert!(find_better_fit(Constant, &xs, &ys, KnownModels::default()).is_none());
    }

    /// Vaguely constant data results in Constant model.
    #[test]
    fn constant_matching() {
        let xs = [10., 20., 30., 40.];
        let ys = [100., 90., 80., 110.];
        assert!(find_better_fit(Constant, &xs, &ys, KnownModels::default()).is_none());
    }

    #[rstest]
    #[case(N)]
    #[case(Log(N))]
    #[case(Sqrt(N))]
    #[case(N * Log(N))]
    #[case(Log(Log(N)))]
    #[case(N2)]
    #[case(N3)]
    fn model_output_fits_itself<M: Model + PartialEq + Clone + 'static>(#[case] model: M) {
        let xs = [
            100.0,
            140.,
            200.,
            300.,
            500.,
            700.,
            1000.0,
            1400.,
            2000.,
            3000.,
            5000.,
            7000.,
            10_000.0f64,
        ];
        let errors = [1.0001, 0.999].iter().cycle();
        for (a, b) in [(50., 2.), (500_000., 1_000_000.)] {
            let ys = xs
                .iter()
                .zip(errors.clone())
                .map(|(x, err)| (a + b * model.complexity(*x)) * err)
                .collect::<Vec<f64>>();
            assert!(find_better_fit(model.clone(), &xs, &ys, KnownModels::default()).is_none());
        }
    }
}