#![deny(missing_docs)]
use std::fmt::{self, Display};
use std::ops::Deref;
#[doc(inline)]
pub use models::*;
pub use binary_search::Options as BinarySearchOptions;
#[cfg(feature = "ols")]
pub use derived::{exponential_ols, power_ols};
pub use gradient_descent::{
ParallelOptions as GradientDescentParallelOptions,
SimultaneousOptions as GradientDescentSimultaneousOptions,
};
#[cfg(feature = "ols")]
pub use ols::OlsEstimator;
pub use spiral::{SpiralLinear, SpiralLogisticWithCeiling};
pub use theil_sen::{LinearTheilSen, PolynomialTheilSen};
trait Model: Predictive + Display {}
impl<T: Predictive + Display> Model for T {}
pub struct DynModel {
model: Box<dyn Model>,
}
impl DynModel {
pub fn new(model: impl Predictive + Display + 'static) -> Self {
Self {
model: Box::new(model),
}
}
}
impl Predictive for DynModel {
fn predict_outcome(&self, predictor: f64) -> f64 {
self.model.predict_outcome(predictor)
}
}
impl Display for DynModel {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.model.fmt(f)
}
}
pub trait Predictive {
fn predict_outcome(&self, predictor: f64) -> f64;
fn boxed(self) -> DynModel
where
Self: Sized + Display + 'static,
{
DynModel::new(self)
}
}
impl<T: Predictive + ?Sized> Predictive for &T {
fn predict_outcome(&self, predictor: f64) -> f64 {
(**self).predict_outcome(predictor)
}
}
pub trait Determination: Predictive {
fn determination(
&self,
predictors: impl Iterator<Item = f64>,
outcomes: impl Iterator<Item = f64> + Clone,
len: usize,
) -> f64 {
let outcomes_mean = outcomes.clone().sum::<f64>() / len as f64;
let residuals = predictors
.zip(outcomes.clone())
.map(|(pred, out)| out - self.predict_outcome(pred));
let res: f64 = residuals.map(|residual| residual * residual).sum();
let tot: f64 = outcomes
.map(|out| {
let diff = out - outcomes_mean;
diff * diff
})
.sum();
let mut diff = res / tot;
if diff.is_nan() {
diff = 0.
};
1.0 - diff
}
fn determination_slice(&self, predictors: &[f64], outcomes: &[f64]) -> f64 {
assert_eq!(
predictors.len(),
outcomes.len(),
"predictors and outcomes must have the same number of items"
);
Determination::determination(
self,
predictors.iter().cloned(),
outcomes.iter().cloned(),
predictors.len(),
)
}
}
impl<T: Predictive> Determination for T {}
pub mod models {
use super::*;
use std::f64::consts::E;
pub use trig::*;
macro_rules! estimator {
($(
$(#[$docs:meta])*
$name:ident -> $item:ty,
$($(#[$more_docs:meta])* ($($arg:ident: $ty:ty),*),)?
$model:ident, $box:ident
)+) => {
$(
$(#[$docs])*
pub trait $name {
// #[doc = stringify!("Model the [`", $item, "`] from `predictors` and `outcomes`."]
#[doc = "Model the [`"]
#[doc = stringify!($item)]
#[doc = "`] from `predictors` and `outcomes`."]
$($(#[$more_docs])*)?
fn $model(&self, predictors: &[f64], outcomes: &[f64], $($($arg: $ty),*)?) -> $item;
fn $box(self) -> Box<dyn $name>
where
Self: Sized + 'static,
{
Box::new(self)
}
}
impl<T: $name + ?Sized> $name for &T {
fn $model(&self, predictors: &[f64], outcomes: &[f64], $($($arg:$ty),*)?) -> $item {
(**self).$model(predictors, outcomes, $($($arg),*)?)
}
}
)+
};
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct LinearCoefficients {
pub k: f64,
pub m: f64,
}
impl Predictive for LinearCoefficients {
fn predict_outcome(&self, predictor: f64) -> f64 {
self.k * predictor + self.m
}
}
impl Display for LinearCoefficients {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let p = f.precision().unwrap_or(5);
write!(f, "{:.2$}x + {:.2$}", self.k, self.m, p)
}
}
#[derive(Clone, Debug)]
pub struct PolynomialCoefficients {
pub(crate) coefficients: Vec<f64>,
}
impl Deref for PolynomialCoefficients {
type Target = [f64];
fn deref(&self) -> &Self::Target {
&self.coefficients
}
}
impl Display for PolynomialCoefficients {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut first = true;
for (degree, mut coefficient) in self.coefficients.iter().copied().enumerate().rev() {
if coefficient.abs() < 1e-100 {
continue;
}
if !first {
if coefficient.is_sign_positive() {
write!(f, " + ")?;
} else {
write!(f, " - ")?;
coefficient = -coefficient;
}
}
let p = f.precision().unwrap_or(5);
match degree {
0 => write!(f, "{coefficient:.*}", p)?,
1 => write!(f, "{coefficient:.*}x", p)?,
2..=9 => write!(f, "{coefficient:.0$}x^{degree:.0$}", p)?,
_ => write!(f, "{coefficient:.0$}x^{{{degree:.0$}}}", p)?,
}
first = false;
}
Ok(())
}
}
impl PolynomialCoefficients {
#[inline(always)]
fn naive_predict(&self, predictor: f64) -> f64 {
match self.coefficients.len() {
0 => 0.,
1 => self.coefficients[0],
2 => self.coefficients[1] * predictor + self.coefficients[0],
3 => {
self.coefficients[2] * predictor * predictor
+ self.coefficients[1] * predictor
+ self.coefficients[0]
}
4 => {
let p2 = predictor * predictor;
self.coefficients[3] * predictor * p2
+ self.coefficients[2] * p2
+ self.coefficients[1] * predictor
+ self.coefficients[0]
}
_ => {
let mut out = 0.0;
let mut pred = 1.;
for coefficient in self.coefficients.iter().copied() {
out += pred * coefficient;
pred *= predictor;
}
out
}
}
}
pub fn derivative(&self) -> Self {
let mut coeffs = Vec::with_capacity(self.len().saturating_sub(1));
for (idx, coeff) in self.coefficients.iter().enumerate().skip(1) {
coeffs.push(*coeff * (idx) as f64);
}
Self {
coefficients: coeffs,
}
}
pub fn integral(&self) -> Self {
let mut coeffs = Vec::with_capacity(self.len() + 1);
coeffs.push(0.);
for (idx, coeff) in self.coefficients.iter().enumerate() {
coeffs.push(*coeff / (idx + 1) as f64);
}
Self {
coefficients: coeffs,
}
}
}
impl Predictive for PolynomialCoefficients {
#[cfg(feature = "arbitrary-precision")]
fn predict_outcome(&self, predictor: f64) -> f64 {
if self.coefficients.len() < 10 {
self.naive_predict(predictor)
} else {
use rug::ops::PowAssign;
use rug::Assign;
use std::ops::MulAssign;
let precision = (64 + self.len() * 2) as u32;
let mut out = rug::Float::with_val(precision, 0.0f64);
let original_predictor = predictor;
let mut predictor = rug::Float::with_val(precision, predictor);
for (degree, coefficient) in self.coefficients.iter().copied().enumerate() {
predictor.pow_assign(degree as u32);
predictor.mul_assign(coefficient);
out += &predictor;
predictor.assign(original_predictor)
}
out.to_f64()
}
}
#[cfg(not(feature = "arbitrary-precision"))]
#[inline(always)]
fn predict_outcome(&self, predictor: f64) -> f64 {
self.naive_predict(predictor)
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct PowerCoefficients {
pub k: f64,
pub e: f64,
pub predictor_additive: f64,
pub outcome_additive: f64,
}
impl Predictive for PowerCoefficients {
fn predict_outcome(&self, predictor: f64) -> f64 {
self.k * (predictor + self.predictor_additive).powf(self.e) - self.outcome_additive
}
}
impl Display for PowerCoefficients {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let p = f.precision().unwrap_or(5);
write!(
f,
"{:.3$} * {x}^{:.3$}{}",
self.k,
self.e,
if self.outcome_additive != 0. {
format!(" - {:.1$}", self.outcome_additive, p)
} else {
String::new()
},
p,
x = if self.predictor_additive != 0. {
format!("(x + {:.1$})", self.predictor_additive, p)
} else {
"x".to_string()
},
)
}
}
impl From<LinearCoefficients> for PolynomialCoefficients {
fn from(coefficients: LinearCoefficients) -> Self {
Self {
coefficients: vec![coefficients.m, coefficients.k],
}
}
}
impl<T: Into<Vec<f64>>> From<T> for PolynomialCoefficients {
fn from(t: T) -> Self {
Self {
coefficients: t.into(),
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct ExponentialCoefficients {
pub k: f64,
pub b: f64,
pub predictor_additive: f64,
pub outcome_additive: f64,
}
impl Predictive for ExponentialCoefficients {
fn predict_outcome(&self, predictor: f64) -> f64 {
self.k * self.b.powf(predictor + self.predictor_additive) - self.outcome_additive
}
}
impl Display for ExponentialCoefficients {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let p = f.precision().unwrap_or(5);
write!(
f,
"{:.3$} * {:.3$}^{x}{}",
self.k,
self.b,
if self.outcome_additive != 0. {
format!(" - {:.1$}", self.outcome_additive, p)
} else {
String::new()
},
p,
x = if self.predictor_additive != 0. {
format!("(x + {:.1$})", self.predictor_additive, p)
} else {
"x".to_string()
},
)
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct LogisticCoefficients {
pub x0: f64,
pub l: f64,
pub k: f64,
}
impl Predictive for LogisticCoefficients {
#[inline(always)]
fn predict_outcome(&self, predictor: f64) -> f64 {
self.l / (1. + E.powf(-self.k * (predictor - self.x0)))
}
}
impl Display for LogisticCoefficients {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let p = f.precision().unwrap_or(5);
write!(
f,
"{:.3$} / (1 + e^({}(x {})))",
self.l,
if self.k.is_sign_negative() {
format!("{:.1$}", -self.k, p)
} else {
format!("-{:.1$}", self.k, p)
},
if self.x0.is_sign_negative() {
format!("+ {:.1$}", -self.x0, p)
} else {
format!("- {:.1$}", self.x0, p)
},
p
)
}
}
estimator!(
LinearEstimator -> LinearCoefficients, model_linear, boxed_linear
PolynomialEstimator -> PolynomialCoefficients,
(degree: usize), model_polynomial, boxed_polynomial
PowerEstimator -> PowerCoefficients, model_power, boxed_power
ExponentialEstimator -> ExponentialCoefficients, model_exponential, boxed_exponential
LogisticEstimator -> LogisticCoefficients, model_logistic, boxed_logistic
);
pub mod trig {
use super::*;
macro_rules! simple_coefficients {
($(
$(#[$docs:meta])+
$name:ident, f64::$fn:ident
)+) => {
simple_coefficients!($($(#[$docs])* $name, v f64::$fn(v), stringify!($fn))+);
};
($(
$(#[$docs:meta])+
$name:ident, 1 / f64::$fn:ident, $disp:expr
)+) => {
simple_coefficients!($($(#[$docs])* $name, v 1.0/f64::$fn(v), $disp)+);
};
($(
$(#[$docs:meta])+
$name:ident, $v:ident $fn:expr, $disp:expr
)+) => {
$(
$(#[$docs])+
#[derive(PartialEq, Clone, Debug)]
pub struct $name {
/// The amplitude of this function.
pub amplitude: f64,
pub frequency: f64,
pub phase: f64,
}
impl Predictive for $name {
fn predict_outcome(&self, predictor: f64) -> f64 {
let $v = predictor * self.frequency + self.phase;
self.amplitude * $fn
}
}
impl Display for $name {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let p = f.precision().unwrap_or(5);
write!(
f,
"{:.4$}{}({:.4$}x+{:.4$})",
self.amplitude,
$disp,
self.frequency,
self.phase,
p,
)
}
}
impl $name {
#[inline(always)]
pub(crate) fn wrap(array: [f64; 3]) -> Self {
Self {
amplitude: array[0],
frequency: array[1],
phase: array[2] % (std::f64::consts::PI * 2.),
}
}
}
)+
};
}
simple_coefficients!(
SineCoefficients, f64::sin
CosineCoefficients, f64::cos
TangentCoefficients, f64::tan
);
simple_coefficients!(
SecantCoefficients,
1 / f64::sin, "sec"
CosecantCoefficients,
1 / f64::cos, "csc"
CotangentCoefficients,
1 / f64::tan, "cot"
);
estimator!(
SineEstimator -> SineCoefficients, (max_frequency: f64), model_sine, boxed_sine
CosineEstimator -> CosineCoefficients, (max_frequency: f64), model_cosine, boxed_cosine
TangentEstimator -> TangentCoefficients, (max_frequency: f64), model_tangent, boxed_tangent
SecantEstimator -> SecantCoefficients, (max_frequency: f64), model_secant, boxed_sesecant
CosecantEstimator -> CosecantCoefficients, (max_frequency: f64), model_cosecant, boxed_cosecant
CotangentEstimator -> CotangentCoefficients, (max_frequency: f64), model_cotangent, boxed_cotangent
);
}
}
pub fn best_fit(
predictors: &[f64],
outcomes: &[f64],
linear_estimator: &impl LinearEstimator,
) -> DynModel {
const LINEAR_BUMP: f64 = 0.0;
const POWER_BUMP: f64 = 1.5;
const EXPONENTIAL_BUMP: f64 = 1.3;
#[allow(unused)]
const SECOND_DEGREE_DISADVANTAGE: f64 = 0.94;
#[allow(unused)]
const THIRD_DEGREE_DISADVANTAGE: f64 = 0.9;
let mut best: Option<(DynModel, f64)> = None;
macro_rules! update_best {
($new: expr, $e: ident, $modificator: expr, $err: expr) => {
let $e = $err;
let weighted = $modificator;
if let Some((_, error)) = &best {
if weighted > *error {
best = Some((DynModel::new($new), weighted))
}
} else {
best = Some((DynModel::new($new), weighted))
}
};
($new: expr, $e: ident, $modificator: expr) => {
update_best!(
$new,
$e,
$modificator,
$new.determination_slice(predictors, outcomes)
)
};
($new: expr) => {
update_best!($new, e, e)
};
}
let predictor_min = derived::min(predictors).unwrap();
let outcomes_min = derived::min(outcomes).unwrap();
if predictor_min >= 1.0 && outcomes_min >= 1.0 {
let mut mod_predictors = predictors.to_vec();
let mut mod_outcomes = outcomes.to_vec();
let power = derived::power_given_min(
&mut mod_predictors,
&mut mod_outcomes,
predictor_min,
outcomes_min,
linear_estimator,
);
let distance_from_integer = -(0.5 - power.e % 1.0).abs() + 0.5;
let mut power_bump = 1.0;
if distance_from_integer < 0.15 && power.e <= 3.5 && power.e >= -2.5 {
power_bump *= POWER_BUMP;
}
let distance_from_fraction = -(0.5 - power.e.recip() % 1.0).abs() + 0.5;
if distance_from_fraction < 0.1 && power.e.recip() <= 3.5 && power.e.recip() > 0.5 {
power_bump *= POWER_BUMP;
}
let certainty = power.determination_slice(predictors, outcomes);
if certainty > 0.8 {
power_bump *= EXPONENTIAL_BUMP;
}
if certainty > 0.92 {
power_bump *= EXPONENTIAL_BUMP;
}
update_best!(power, e, e * power_bump, certainty);
mod_predictors[..].copy_from_slice(predictors);
mod_outcomes[..].copy_from_slice(outcomes);
let exponential = derived::exponential_given_min(
&mut mod_predictors,
&mut mod_outcomes,
predictor_min,
outcomes_min,
linear_estimator,
);
let certainty = exponential.determination_slice(predictors, outcomes);
let mut exponential_bump = if certainty > 0.8 {
EXPONENTIAL_BUMP
} else {
1.0
};
if certainty > 0.92 {
exponential_bump *= EXPONENTIAL_BUMP;
}
update_best!(exponential, e, e * exponential_bump, certainty);
}
#[cfg(feature = "ols")]
if predictors.len() > 15 {
let degree_2 = ols::polynomial(
predictors.iter().copied(),
outcomes.iter().copied(),
predictors.len(),
2,
);
update_best!(degree_2, e, e * SECOND_DEGREE_DISADVANTAGE);
}
#[cfg(feature = "ols")]
if predictors.len() > 50 {
let degree_3 = ols::polynomial(
predictors.iter().copied(),
outcomes.iter().copied(),
predictors.len(),
3,
);
update_best!(degree_3, e, e * THIRD_DEGREE_DISADVANTAGE);
}
let linear = linear_estimator.model_linear(predictors, outcomes);
update_best!(linear, e, e + LINEAR_BUMP);
best.unwrap().0
}
#[cfg(feature = "ols")]
pub fn best_fit_ols(predictors: &[f64], outcomes: &[f64]) -> DynModel {
best_fit(predictors, outcomes, &OlsEstimator)
}
pub mod derived {
use super::*;
pub(super) fn min(slice: &[f64]) -> Option<f64> {
slice
.iter()
.copied()
.map(crate::F64OrdHash)
.min()
.map(|f| f.0)
}
#[cfg(feature = "ols")]
pub fn power_ols(predictors: &mut [f64], outcomes: &mut [f64]) -> PowerCoefficients {
power(predictors, outcomes, &OlsEstimator)
}
pub fn power<E: LinearEstimator>(
predictors: &mut [f64],
outcomes: &mut [f64],
estimator: &E,
) -> PowerCoefficients {
assert!(predictors.len() > 2);
assert!(outcomes.len() > 2);
let predictor_min = min(predictors).unwrap();
let outcome_min = min(outcomes).unwrap();
power_given_min(predictors, outcomes, predictor_min, outcome_min, estimator)
}
pub fn power_given_min<E: LinearEstimator>(
predictors: &mut [f64],
outcomes: &mut [f64],
predictor_min: f64,
outcome_min: f64,
estimator: &E,
) -> PowerCoefficients {
assert_eq!(predictors.len(), outcomes.len());
assert!(predictors.len() > 2);
let predictor_additive = if predictor_min < 1.0 {
Some(1.0 - predictor_min)
} else {
None
};
let outcome_additive = if outcome_min < 1.0 {
Some(1.0 - outcome_min)
} else {
None
};
predictors
.iter_mut()
.for_each(|pred| *pred = (*pred + predictor_additive.unwrap_or(0.0)).log2());
outcomes
.iter_mut()
.for_each(|y| *y = (*y + outcome_additive.unwrap_or(0.0)).log2());
let coefficients = estimator.model_linear(predictors, outcomes);
let k = 2.0_f64.powf(coefficients.m);
let e = coefficients.k;
PowerCoefficients {
k,
e,
predictor_additive: predictor_additive.unwrap_or(0.),
outcome_additive: outcome_additive.unwrap_or(0.),
}
}
#[cfg(feature = "ols")]
pub fn exponential_ols(
predictors: &mut [f64],
outcomes: &mut [f64],
) -> ExponentialCoefficients {
exponential(predictors, outcomes, &OlsEstimator)
}
pub fn exponential<E: LinearEstimator>(
predictors: &mut [f64],
outcomes: &mut [f64],
estimator: &E,
) -> ExponentialCoefficients {
assert!(predictors.len() > 2);
assert!(outcomes.len() > 2);
let predictor_min = min(predictors).unwrap();
let outcome_min = min(outcomes).unwrap();
exponential_given_min(predictors, outcomes, predictor_min, outcome_min, estimator)
}
pub fn exponential_given_min<E: LinearEstimator>(
predictors: &mut [f64],
outcomes: &mut [f64],
predictor_min: f64,
outcome_min: f64,
estimator: &E,
) -> ExponentialCoefficients {
assert_eq!(predictors.len(), outcomes.len());
assert!(predictors.len() > 2);
let predictor_additive = if predictor_min < 1.0 {
Some(1.0 - predictor_min)
} else {
None
};
let outcome_additive = if outcome_min < 1.0 {
Some(1.0 - outcome_min)
} else {
None
};
if let Some(predictor_additive) = predictor_additive {
predictors
.iter_mut()
.for_each(|pred| *pred += predictor_additive);
}
outcomes
.iter_mut()
.for_each(|y| *y = (*y + outcome_additive.unwrap_or(0.0)).log2());
let coefficients = estimator.model_linear(predictors, outcomes);
let k = 2.0_f64.powf(coefficients.m);
let b = 2.0_f64.powf(coefficients.k);
ExponentialCoefficients {
k,
b,
predictor_additive: predictor_additive.unwrap_or(0.),
outcome_additive: outcome_additive.unwrap_or(0.),
}
}
}
#[cfg(feature = "arbitrary-precision")]
pub mod arbitrary_linear_algebra {
use std::cell::RefCell;
use std::fmt::{self, Display};
use std::ops::{
Add, AddAssign, Deref, Div, DivAssign, Mul, MulAssign, Neg, Rem, RemAssign, Sub, SubAssign,
};
use nalgebra::{ComplexField, RealField};
use rug::Assign;
thread_local! {
pub static DEFAULT_PRECISION: RefCell<u32> = RefCell::new(256);
}
pub fn set_default_precision(new: u32) {
DEFAULT_PRECISION.with(|v| *v.borrow_mut() = new);
}
pub fn default_precision() -> u32 {
DEFAULT_PRECISION.with(|v| *v.borrow())
}
#[derive(Debug, Clone, PartialEq, PartialOrd)]
pub struct FloatWrapper(pub rug::Float);
impl From<rug::Float> for FloatWrapper {
fn from(f: rug::Float) -> Self {
Self(f)
}
}
impl simba::scalar::SupersetOf<f64> for FloatWrapper {
fn is_in_subset(&self) -> bool {
self.0.prec() <= 53
}
fn to_subset(&self) -> Option<f64> {
if simba::scalar::SupersetOf::<f64>::is_in_subset(self) {
Some(self.0.to_f64())
} else {
None
}
}
fn to_subset_unchecked(&self) -> f64 {
self.0.to_f64()
}
fn from_subset(element: &f64) -> Self {
rug::Float::with_val(default_precision(), element).into()
}
}
impl simba::scalar::SubsetOf<Self> for FloatWrapper {
fn to_superset(&self) -> Self {
self.clone()
}
fn from_superset_unchecked(element: &Self) -> Self {
element.clone()
}
fn is_in_subset(_element: &Self) -> bool {
true
}
}
impl num_traits::cast::FromPrimitive for FloatWrapper {
fn from_i64(n: i64) -> Option<Self> {
Some(rug::Float::with_val(default_precision(), n).into())
}
fn from_u64(n: u64) -> Option<Self> {
Some(rug::Float::with_val(default_precision(), n).into())
}
}
impl Display for FloatWrapper {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.0.fmt(f)
}
}
impl simba::simd::SimdValue for FloatWrapper {
type Element = FloatWrapper;
type SimdBool = bool;
#[inline(always)]
fn lanes() -> usize {
1
}
#[inline(always)]
fn splat(val: Self::Element) -> Self {
val
}
#[inline(always)]
fn extract(&self, _: usize) -> Self::Element {
self.clone()
}
#[inline(always)]
unsafe fn extract_unchecked(&self, _: usize) -> Self::Element {
self.clone()
}
#[inline(always)]
fn replace(&mut self, _: usize, val: Self::Element) {
*self = val
}
#[inline(always)]
unsafe fn replace_unchecked(&mut self, _: usize, val: Self::Element) {
*self = val
}
#[inline(always)]
fn select(self, cond: Self::SimdBool, other: Self) -> Self {
if cond {
self
} else {
other
}
}
}
impl Neg for FloatWrapper {
type Output = Self;
fn neg(self) -> Self::Output {
Self(-self.0)
}
}
impl Add for FloatWrapper {
type Output = Self;
fn add(mut self, rhs: Self) -> Self::Output {
self.0 += rhs.0;
self
}
}
impl Sub for FloatWrapper {
type Output = Self;
fn sub(mut self, rhs: Self) -> Self::Output {
self.0 -= rhs.0;
self
}
}
impl Mul for FloatWrapper {
type Output = Self;
fn mul(mut self, rhs: Self) -> Self::Output {
self.0 *= rhs.0;
self
}
}
impl Div for FloatWrapper {
type Output = Self;
fn div(mut self, rhs: Self) -> Self::Output {
self.0 /= rhs.0;
self
}
}
impl Rem for FloatWrapper {
type Output = Self;
fn rem(mut self, rhs: Self) -> Self::Output {
self.0 %= rhs.0;
self
}
}
impl AddAssign for FloatWrapper {
fn add_assign(&mut self, rhs: Self) {
self.0 += rhs.0;
}
}
impl SubAssign for FloatWrapper {
fn sub_assign(&mut self, rhs: Self) {
self.0 -= rhs.0;
}
}
impl MulAssign for FloatWrapper {
fn mul_assign(&mut self, rhs: Self) {
self.0 *= rhs.0;
}
}
impl DivAssign for FloatWrapper {
fn div_assign(&mut self, rhs: Self) {
self.0 /= rhs.0;
}
}
impl RemAssign for FloatWrapper {
fn rem_assign(&mut self, rhs: Self) {
self.0 %= rhs.0;
}
}
impl num_traits::Zero for FloatWrapper {
fn zero() -> Self {
Self(rug::Float::with_val(default_precision(), 0.0))
}
fn is_zero(&self) -> bool {
self.0 == 0.0
}
}
impl num_traits::One for FloatWrapper {
fn one() -> Self {
Self(rug::Float::with_val(default_precision(), 1.0))
}
}
impl num_traits::Num for FloatWrapper {
type FromStrRadixErr = rug::float::ParseFloatError;
fn from_str_radix(s: &str, radix: u32) -> Result<Self, Self::FromStrRadixErr> {
rug::Float::parse_radix(s, radix as i32)
.map(|f| Self(rug::Float::with_val(default_precision(), f)))
}
}
impl num_traits::Signed for FloatWrapper {
fn abs(&self) -> Self {
(*self.0.as_abs()).clone().into()
}
fn abs_sub(&self, other: &Self) -> Self {
if self.0 <= other.0 {
rug::Float::with_val(self.prec(), 0.0f64).into()
} else {
Self(self.0.clone() - &other.0)
}
}
fn signum(&self) -> Self {
self.0.clone().signum().into()
}
fn is_positive(&self) -> bool {
self.0.is_sign_positive()
}
fn is_negative(&self) -> bool {
self.0.is_sign_negative()
}
}
impl approx::AbsDiffEq for FloatWrapper {
type Epsilon = Self;
fn default_epsilon() -> Self::Epsilon {
rug::Float::with_val(default_precision(), f64::EPSILON).into()
}
fn abs_diff_eq(&self, other: &Self, epsilon: Self::Epsilon) -> bool {
if self.0 == other.0 {
return true;
}
if self.0.is_infinite() || other.0.is_infinite() {
return false;
}
let mut buffer = self.clone();
buffer.0.assign(&self.0 - &other.0);
buffer.0.abs_mut();
let abs_diff = buffer;
abs_diff.0 <= epsilon.0
}
}
impl approx::RelativeEq for FloatWrapper {
fn default_max_relative() -> Self::Epsilon {
rug::Float::with_val(default_precision(), f64::EPSILON).into()
}
fn relative_eq(
&self,
other: &Self,
epsilon: Self::Epsilon,
max_relative: Self::Epsilon,
) -> bool {
if self.0 == other.0 {
return true;
}
if self.0.is_infinite() || other.0.is_infinite() {
return false;
}
let mut buffer = self.clone();
buffer.0.assign(&self.0 - &other.0);
buffer.0.abs_mut();
let abs_diff = buffer;
if abs_diff.0 <= epsilon.0 {
return true;
}
let abs_self = self.0.as_abs();
let abs_other = other.0.as_abs();
let largest = if *abs_other > *abs_self {
&*abs_other
} else {
&*abs_self
};
abs_diff.0 <= largest * max_relative.0
}
}
impl approx::UlpsEq for FloatWrapper {
fn default_max_ulps() -> u32 {
4
}
fn ulps_eq(&self, other: &Self, epsilon: Self::Epsilon, _max_ulps: u32) -> bool {
approx::AbsDiffEq::abs_diff_eq(&self, &other, epsilon)
}
}
impl nalgebra::Field for FloatWrapper {}
impl RealField for FloatWrapper {
fn is_sign_positive(&self) -> bool {
todo!()
}
fn is_sign_negative(&self) -> bool {
todo!()
}
fn copysign(self, _sign: Self) -> Self {
todo!()
}
fn max(self, _other: Self) -> Self {
todo!()
}
fn min(self, _other: Self) -> Self {
todo!()
}
fn clamp(self, _min: Self, _max: Self) -> Self {
todo!()
}
fn atan2(self, _other: Self) -> Self {
todo!()
}
fn min_value() -> Option<Self> {
todo!()
}
fn max_value() -> Option<Self> {
todo!()
}
fn pi() -> Self {
todo!()
}
fn two_pi() -> Self {
todo!()
}
fn frac_pi_2() -> Self {
todo!()
}
fn frac_pi_3() -> Self {
todo!()
}
fn frac_pi_4() -> Self {
todo!()
}
fn frac_pi_6() -> Self {
todo!()
}
fn frac_pi_8() -> Self {
todo!()
}
fn frac_1_pi() -> Self {
todo!()
}
fn frac_2_pi() -> Self {
todo!()
}
fn frac_2_sqrt_pi() -> Self {
todo!()
}
fn e() -> Self {
todo!()
}
fn log2_e() -> Self {
todo!()
}
fn log10_e() -> Self {
todo!()
}
fn ln_2() -> Self {
todo!()
}
fn ln_10() -> Self {
todo!()
}
}
impl ComplexField for FloatWrapper {
type RealField = Self;
fn from_real(re: Self::RealField) -> Self {
re
}
fn real(self) -> Self::RealField {
self
}
fn imaginary(mut self) -> Self::RealField {
self.0.assign(0.0);
self
}
fn modulus(self) -> Self::RealField {
self.abs()
}
fn modulus_squared(self) -> Self::RealField {
self.0.square().into()
}
fn argument(mut self) -> Self::RealField {
if self.0.is_sign_positive() || self.0.is_zero() {
self.0.assign(0.0);
self
} else {
Self::pi()
}
}
fn norm1(self) -> Self::RealField {
self.abs()
}
fn scale(self, factor: Self::RealField) -> Self {
self.0.mul(factor.0).into()
}
fn unscale(self, factor: Self::RealField) -> Self {
self.0.div(factor.0).into()
}
fn floor(self) -> Self {
todo!()
}
fn ceil(self) -> Self {
todo!()
}
fn round(self) -> Self {
todo!()
}
fn trunc(self) -> Self {
todo!()
}
fn fract(self) -> Self {
todo!()
}
fn mul_add(self, _a: Self, _b: Self) -> Self {
todo!()
}
fn abs(self) -> Self::RealField {
self.0.abs().into()
}
fn hypot(self, other: Self) -> Self::RealField {
self.0.hypot(&other.0).into()
}
fn recip(self) -> Self {
todo!()
}
fn conjugate(self) -> Self {
self
}
fn sin(self) -> Self {
todo!()
}
fn cos(self) -> Self {
todo!()
}
fn sin_cos(self) -> (Self, Self) {
todo!()
}
fn tan(self) -> Self {
todo!()
}
fn asin(self) -> Self {
todo!()
}
fn acos(self) -> Self {
todo!()
}
fn atan(self) -> Self {
todo!()
}
fn sinh(self) -> Self {
todo!()
}
fn cosh(self) -> Self {
todo!()
}
fn tanh(self) -> Self {
todo!()
}
fn asinh(self) -> Self {
todo!()
}
fn acosh(self) -> Self {
todo!()
}
fn atanh(self) -> Self {
todo!()
}
fn log(self, _base: Self::RealField) -> Self {
todo!()
}
fn log2(self) -> Self {
todo!()
}
fn log10(self) -> Self {
todo!()
}
fn ln(self) -> Self {
todo!()
}
fn ln_1p(self) -> Self {
todo!()
}
fn sqrt(self) -> Self {
self.0.sqrt().into()
}
fn exp(self) -> Self {
todo!()
}
fn exp2(self) -> Self {
todo!()
}
fn exp_m1(self) -> Self {
todo!()
}
fn powi(self, _n: i32) -> Self {
todo!()
}
fn powf(self, _n: Self::RealField) -> Self {
todo!()
}
fn powc(self, _n: Self) -> Self {
todo!()
}
fn cbrt(self) -> Self {
todo!()
}
fn try_sqrt(self) -> Option<Self> {
todo!()
}
fn is_finite(&self) -> bool {
self.0.is_finite()
}
}
impl Deref for FloatWrapper {
type Target = rug::Float;
fn deref(&self) -> &Self::Target {
&self.0
}
}
}
#[cfg(feature = "ols")]
pub mod ols {
use std::cell::RefCell;
use nalgebra::DMatrix;
use super::*;
#[must_use]
struct RuntimeMatrices {
design: DMatrix<f64>,
transposed: DMatrix<f64>,
outcomes: DMatrix<f64>,
intermediary1: DMatrix<f64>,
intermediary2: DMatrix<f64>,
result: DMatrix<f64>,
len: usize,
degree: usize,
}
impl RuntimeMatrices {
fn new() -> Self {
Self {
design: DMatrix::zeros(0, 0),
transposed: DMatrix::zeros(0, 0),
outcomes: DMatrix::zeros(0, 0),
intermediary1: DMatrix::zeros(0, 0),
intermediary2: DMatrix::zeros(0, 0),
result: DMatrix::zeros(0, 0),
len: 0,
degree: 0,
}
}
#[inline]
fn resize(&mut self, len: usize, degree: usize) {
if self.len == len && self.degree == degree {
return;
}
let rows = len;
let columns = degree + 1;
self.design.resize_mut(rows, columns, 0.);
self.transposed.resize_mut(columns, rows, 0.);
self.outcomes.resize_mut(rows, 1, 0.);
self.intermediary1.resize_mut(columns, columns, 0.);
self.intermediary2.resize_mut(rows, columns, 0.);
self.result.resize_mut(columns, 1, 0.);
self.len = len;
self.degree = degree;
}
}
thread_local! {static RUNTIME: RefCell<RuntimeMatrices> = RefCell::new(RuntimeMatrices::new());}
pub struct OlsEstimator;
impl LinearEstimator for OlsEstimator {
fn model_linear(&self, predictors: &[f64], outcomes: &[f64]) -> LinearCoefficients {
let coefficients = polynomial(
predictors.iter().copied(),
outcomes.iter().copied(),
predictors.len(),
1,
);
LinearCoefficients {
k: coefficients[1],
m: coefficients[0],
}
}
}
impl PolynomialEstimator for OlsEstimator {
fn model_polynomial(
&self,
predictors: &[f64],
outcomes: &[f64],
degree: usize,
) -> PolynomialCoefficients {
assert_eq!(predictors.len(), outcomes.len());
polynomial(
predictors.iter().copied(),
outcomes.iter().copied(),
predictors.len(),
degree,
)
}
}
#[inline(always)]
pub fn polynomial(
predictors: impl Iterator<Item = f64> + Clone,
outcomes: impl Iterator<Item = f64>,
len: usize,
degree: usize,
) -> PolynomialCoefficients {
#[allow(unused)]
fn polynomial_simple(
predictors: impl Iterator<Item = f64> + Clone,
outcomes: impl Iterator<Item = f64>,
len: usize,
degree: usize,
) -> PolynomialCoefficients {
let predictor_original = predictors.clone();
let mut predictor_iter = predictors;
let design =
nalgebra::DMatrix::from_fn(len, degree + 1, |row: usize, column: usize| {
if column == 0 {
1.0
} else if column == 1 {
predictor_iter.next().unwrap()
} else {
if row == 0 {
predictor_iter = predictor_original.clone();
}
predictor_iter.next().unwrap().powi(column as _)
}
});
let t = design.transpose();
let outcomes = nalgebra::DMatrix::from_iterator(len, 1, outcomes);
let result = ((&t * &design)
.try_inverse()
.unwrap_or_else(|| (&t * &design).pseudo_inverse(0e-6).unwrap())
* &t)
* outcomes;
PolynomialCoefficients {
coefficients: result.iter().copied().collect(),
}
}
fn polynomial_simple_preallocated(
predictors: impl Iterator<Item = f64> + Clone,
outcomes: impl Iterator<Item = f64>,
len: usize,
degree: usize,
) -> PolynomialCoefficients {
RUNTIME.with(move |runtime| {
let mut runtime = runtime.borrow_mut();
let predictor_original = predictors.clone();
let mut predictor_iter = predictors;
runtime.resize(len, degree);
let RuntimeMatrices {
design,
transposed,
outcomes: outcomes_matrix,
intermediary1,
intermediary2,
result,
..
} = &mut *runtime;
{
let (rows, columns) = design.shape();
for column in 0..columns {
for row in 0..rows {
let v = unsafe { design.get_unchecked_mut((row, column)) };
*v = if column == 0 {
1.0
} else if column == 1 {
predictor_iter.next().unwrap()
} else {
if row == 0 {
predictor_iter = predictor_original.clone();
}
predictor_iter.next().unwrap().powi(column as _)
};
}
}
}
design.transpose_to(transposed);
{
let rows = outcomes_matrix.nrows();
for (row, outcome) in (0..rows).zip(outcomes) {
let v = unsafe { outcomes_matrix.get_unchecked_mut((row, 0)) };
*v = outcome;
}
}
transposed.mul_to(design, intermediary1);
if !intermediary1.try_inverse_mut() {
let im = std::mem::replace(intermediary1, DMatrix::zeros(0, 0));
let pseudo_inverse = im.pseudo_inverse(1e-8).unwrap();
*intermediary1 = pseudo_inverse;
}
*intermediary2 = &*intermediary1 * &*transposed;
intermediary2.mul_to(outcomes_matrix, result);
PolynomialCoefficients {
coefficients: runtime.result.iter().copied().collect(),
}
})
}
#[cfg(feature = "arbitrary-precision")]
fn polynomial_arbitrary(
predictors: impl Iterator<Item = f64> + Clone,
outcomes: impl Iterator<Item = f64>,
len: usize,
degree: usize,
) -> PolynomialCoefficients {
use rug::ops::PowAssign;
let precision = (64 + degree * 2) as u32;
let old = arbitrary_linear_algebra::default_precision();
arbitrary_linear_algebra::set_default_precision(precision);
let predictors = predictors.map(|x| {
arbitrary_linear_algebra::FloatWrapper::from(rug::Float::with_val(precision, x))
});
let outcomes = outcomes.map(|y| {
arbitrary_linear_algebra::FloatWrapper::from(rug::Float::with_val(precision, y))
});
let predictor_original = predictors.clone();
let mut predictor_iter = predictors;
let design =
nalgebra::DMatrix::from_fn(len, degree + 1, |row: usize, column: usize| {
if column == 0 {
rug::Float::with_val(precision, 1.0_f64).into()
} else if column == 1 {
predictor_iter.next().unwrap()
} else {
if row == 0 {
predictor_iter = predictor_original.clone();
}
let mut f = predictor_iter.next().unwrap();
f.0.pow_assign(column as u32);
f
}
});
let t = design.transpose();
let outcomes = nalgebra::DMatrix::from_iterator(len, 1, outcomes);
let result = ((&t * &design).try_inverse().unwrap() * &t) * outcomes;
arbitrary_linear_algebra::set_default_precision(old);
PolynomialCoefficients {
coefficients: result.iter().map(|f| f.0.to_f64()).collect(),
}
}
debug_assert!(degree < len, "degree + 1 must be less than or equal to len");
#[cfg(feature = "arbitrary-precision")]
if degree < 10 {
polynomial_simple_preallocated(predictors, outcomes, len, degree)
} else {
polynomial_arbitrary(predictors, outcomes, len, degree)
}
#[cfg(not(feature = "arbitrary-precision"))]
polynomial_simple_preallocated(predictors, outcomes, len, degree)
}
}
pub mod theil_sen {
use super::*;
use crate::{percentile, F64OrdHash};
use std::fmt::Debug;
pub struct PermutationIterBuffer<T> {
buf: Vec<(T, T)>,
}
impl<T> Deref for PermutationIterBuffer<T> {
type Target = [(T, T)];
fn deref(&self) -> &Self::Target {
&self.buf
}
}
#[derive(Debug)]
pub struct PermutationIter<'a, T> {
s1: &'a [T],
s2: &'a [T],
iters: Vec<usize>,
values: Option<Vec<(T, T)>>,
values_backup: Vec<(T, T)>,
pairs: usize,
}
impl<'a, T: Copy + Debug> PermutationIter<'a, T> {
fn new(s1: &'a [T], s2: &'a [T], pairs: usize) -> Self {
assert!(
pairs > 1,
"each coordinate pair must be associated with at least one."
);
assert_eq!(s1.len(), s2.len());
assert!(pairs <= s1.len());
let iters = Vec::with_capacity(pairs);
let values_backup = Vec::with_capacity(pairs);
let mut me = Self {
s1,
s2,
iters,
values: None,
values_backup,
pairs,
};
for i in 0..pairs {
me.iters.push(i + usize::from(i + 1 < pairs) - 1);
}
#[allow(clippy::needless_range_loop)] for i in 0..pairs - 1 {
me.values_backup.push((me.s1[i], me.s2[i]));
}
me.values_backup.push(me.values_backup[0]);
me.values = Some(me.values_backup.clone());
me
}
#[inline(always)]
pub fn give_buffer(&mut self, buf: PermutationIterBuffer<T>) {
debug_assert!(self.values.is_none());
self.values = Some(buf.buf)
}
pub fn collect_by_index(mut self) -> Vec<Vec<(T, T)>> {
let mut vecs = Vec::with_capacity(self.pairs);
for _ in 0..self.pairs {
vecs.push(Vec::new());
}
while let Some(buf) = self.next() {
for (pos, pair) in buf.iter().enumerate() {
vecs[pos].push(*pair)
}
self.give_buffer(buf);
}
vecs
}
pub fn collect_len<const LEN: usize>(mut self) -> Vec<[(T, T); LEN]> {
let mut vec = Vec::new();
while let Some(buf) = self.next() {
let array = <[(T, T); LEN]>::try_from(&*buf).expect(
"tried to collect with set len, but permutations returned different len",
);
vec.push(array);
self.give_buffer(buf);
}
vec
}
}
impl<'a, T: Copy + Debug> Iterator for PermutationIter<'a, T> {
type Item = PermutationIterBuffer<T>;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
for (num, iter) in self.iters.iter_mut().enumerate().rev() {
*iter += 1;
if let Some(value) = self.s1.get(*iter) {
let next = (*value, *unsafe { self.s2.get_unchecked(*iter) });
let values = &mut self.values_backup;
if let Some(v) = self.values.as_mut() {
*unsafe { v.get_unchecked_mut(num) } = next;
}
*unsafe { values.get_unchecked_mut(num) } = next;
if num + 1 == self.pairs {
let values = match self.values.take() {
Some(x) => x,
None => self.values_backup.clone(),
};
return Some(PermutationIterBuffer { buf: values });
} else {
if self.s1.len() - *iter <= self.pairs - 1 - num {
continue;
}
#[allow(clippy::needless_range_loop)] for i in num + 1..self.pairs {
let new = self.iters[i - 1] + usize::from(i + 1 < self.pairs);
self.iters[i] = new;
if i + 1 < self.pairs {
if new >= self.s1.len() {
continue;
}
values[i] = (self.s1[new], self.s2[new]);
if let Some(v) = self.values.as_mut() {
v[i] = (self.s1[new], self.s2[new]);
}
}
}
return self.next();
}
}
}
None
}
}
pub fn permutations_generic<'a, T: Copy + Debug>(
s1: &'a [T],
s2: &'a [T],
pairs: usize,
) -> PermutationIter<'a, T> {
PermutationIter::new(s1, s2, pairs)
}
#[inline]
pub fn estimate_permutation_count(elements: usize, pairs: usize) -> f64 {
let e = elements as f64;
let p = pairs as f64;
e.powf(p) / (p.powf(p - 0.8))
}
#[inline]
pub fn permutation_count(elements: usize, pairs: usize) -> Option<usize> {
fn factorial(num: u128) -> Option<u128> {
match num {
0 | 1 => Some(1),
_ => factorial(num - 1)?.checked_mul(num),
}
}
Some(
(factorial(elements as _)?
/ (factorial(pairs as _)?.checked_mul(factorial((elements - pairs) as _)?))?)
as usize,
)
}
pub fn permutations<'a, T: Copy>(
s1: &'a [T],
s2: &'a [T],
) -> impl Iterator<Item = ((T, T), (T, T))> + 'a {
s1.iter()
.zip(s2.iter())
.enumerate()
.flat_map(|(pos, (t11, t21))| {
let left = &s1[pos + 1..];
let left_other = &s2[pos + 1..];
left.iter()
.zip(left_other.iter())
.map(|(t12, t22)| ((*t11, *t21), (*t12, *t22)))
})
}
pub struct LinearTheilSen;
impl LinearEstimator for LinearTheilSen {
#[inline]
fn model_linear(&self, predictors: &[f64], outcomes: &[f64]) -> LinearCoefficients {
slow_linear(predictors, outcomes)
}
}
pub struct PolynomialTheilSen;
impl PolynomialEstimator for PolynomialTheilSen {
#[inline]
fn model_polynomial(
&self,
predictors: &[f64],
outcomes: &[f64],
degree: usize,
) -> PolynomialCoefficients {
slow_polynomial(predictors, outcomes, degree)
}
}
pub fn slow_linear(predictors: &[f64], outcomes: &[f64]) -> LinearCoefficients {
assert_eq!(predictors.len(), outcomes.len());
let median_slope = {
let slopes = permutations(predictors, outcomes).map(|((x1, y1), (x2, y2))| {
(y1 - y2) / (x1 - x2)
});
let mut slopes: Vec<_> = slopes.map(F64OrdHash).collect();
percentile::median(&mut slopes).resolve()
};
let median = {
#[derive(Debug, Clone, Copy)]
struct CmpFirst<T, V>(T, V);
impl<T: PartialEq, V> PartialEq for CmpFirst<T, V> {
#[inline]
fn eq(&self, other: &Self) -> bool {
self.0.eq(&other.0)
}
}
impl<T: PartialEq + Eq, V> Eq for CmpFirst<T, V> {}
impl<T: PartialOrd, V> PartialOrd for CmpFirst<T, V> {
#[inline]
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
self.0.partial_cmp(&other.0)
}
}
impl<T: Ord, V> Ord for CmpFirst<T, V> {
#[inline]
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.0.cmp(&other.0)
}
}
let mut values: Vec<_> = predictors.iter().zip(outcomes.iter()).collect();
match percentile::percentile_default_pivot_by(
&mut values,
crate::Fraction::HALF,
&mut |a, b| F64OrdHash::f64_cmp(*a.1, *b.1),
) {
percentile::MeanValue::Single(v) => (*v.0, *v.1),
percentile::MeanValue::Mean(v1, v2) => ((v1.0 + v2.0) / 2.0, (v1.1 + v2.1) / 2.0),
}
};
let intersect = median.1 - median.0 * median_slope;
LinearCoefficients {
k: median_slope,
m: intersect,
}
}
pub fn slow_polynomial(
predictors: &[f64],
outcomes: &[f64],
degree: usize,
) -> PolynomialCoefficients {
assert_eq!(predictors.len(), outcomes.len());
if degree == 0 {
let mut outcomes = outcomes.to_vec();
let constant = crate::percentile::percentile_default_pivot_by(
&mut outcomes,
crate::Fraction::HALF,
&mut |a, b| crate::F64OrdHash::f64_cmp(*a, *b),
)
.resolve();
return PolynomialCoefficients {
coefficients: vec![constant],
};
}
let mut iter = permutations_generic(predictors, outcomes, degree + 1);
let mut coefficients = Vec::with_capacity(degree + 1);
let permutations_count = permutation_count(predictors.len(), degree + 1)
.unwrap_or_else(|| estimate_permutation_count(predictors.len(), degree + 1) as usize);
for _ in 0..degree + 1 {
coefficients.push(Vec::with_capacity(permutations_count))
}
match degree {
0 => unreachable!("we handled this above"),
1 => {
while let Some(buf) = iter.next() {
debug_assert_eq!(buf.len(), 2);
let p1 = unsafe { buf.get_unchecked(0) };
let x1 = p1.0;
let y1 = p1.1;
let p2 = unsafe { buf.get_unchecked(1) };
let x2 = p2.0;
let y2 = p2.1;
let slope = (y1 - y2) / (x1 - x2);
let intersect = y1 - x1 * slope;
unsafe {
coefficients.get_unchecked_mut(1).push(slope);
coefficients.get_unchecked_mut(0).push(intersect);
}
iter.give_buffer(buf);
}
}
2 => {
while let Some(buf) = iter.next() {
debug_assert_eq!(buf.len(), 3);
let p1 = unsafe { buf.get_unchecked(0) };
let x1 = p1.0;
let y1 = p1.1;
let p2 = unsafe { buf.get_unchecked(1) };
let x2 = p2.0;
let y2 = p2.1;
let p3 = unsafe { buf.get_unchecked(2) };
let x3 = p3.0;
let y3 = p3.1;
let a = (x1 * (y3 - y2) + x2 * (y1 - y3) + x3 * (y2 - y1))
/ ((x1 - x2) * (x1 - x3) * (x2 - x3));
let b = (y2 - y1) / (x2 - x1) - a * (x1 + x2);
let c = y1 - a * x1 * x1 - b * x1;
unsafe {
coefficients.get_unchecked_mut(2).push(a);
coefficients.get_unchecked_mut(1).push(b);
coefficients.get_unchecked_mut(0).push(c);
}
iter.give_buffer(buf);
}
}
#[cfg(not(feature = "ols"))]
_ => {
panic!("unsupported degree for polynomial Theil-Sen. Supports 1,2 without the OLS cargo feature.");
}
#[cfg(feature = "ols")]
_ => {
while let Some(buf) = iter.next() {
#[inline(always)]
fn tuple_first(t: &(f64, f64)) -> f64 {
t.0
}
#[inline(always)]
fn tuple_second(t: &(f64, f64)) -> f64 {
t.1
}
debug_assert_eq!(buf.len(), degree + 1);
let predictors = buf.iter().map(tuple_first);
let outcomes = buf.iter().map(tuple_second);
let polynomial = ols::polynomial(predictors, outcomes, degree + 1, degree);
for (pos, coefficient) in polynomial.iter().enumerate() {
unsafe { coefficients.get_unchecked_mut(pos).push(*coefficient) };
}
iter.give_buffer(buf);
}
}
}
#[inline(always)]
fn f64_cmp(a: &f64, b: &f64) -> std::cmp::Ordering {
crate::F64OrdHash::f64_cmp(*a, *b)
}
let mut result = Vec::with_capacity(degree + 1);
for mut coefficients in coefficients {
let median = crate::percentile::percentile_default_pivot_by(
&mut coefficients,
crate::Fraction::HALF,
&mut f64_cmp,
)
.resolve();
result.push(median);
}
PolynomialCoefficients {
coefficients: result,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn permutations_eq_1() {
let s1 = [1., 2., 3., 4., 5.];
let s2 = [2., 4., 6., 8., 10.];
let permutations1 = permutations(&s1, &s2)
.map(|(x, y)| [x, y])
.collect::<Vec<_>>();
let permutations2 = permutations_generic(&s1, &s2, 2).collect_len();
assert_eq!(permutations1, permutations2);
}
#[test]
#[cfg(feature = "rand")]
fn permutations_eq_2() {
use rand::Rng;
let mut s1 = [0.0; 20];
let mut s2 = [0.0; 20];
let mut rng = rand::thread_rng();
rng.fill(&mut s1);
rng.fill(&mut s2);
let permutations1 = permutations(&s1, &s2)
.map(|(x, y)| [x, y])
.collect::<Vec<_>>();
let permutations2 = permutations_generic(&s1, &s2, 2).collect_len();
assert_eq!(permutations1, permutations2);
}
#[test]
fn permutations_len_3() {
let s1 = [1., 2., 3., 4., 5.];
let s2 = [2., 4., 6., 8., 10.];
let permutations = permutations_generic(&s1, &s2, 3).collect_len::<3>();
let expected: &[[(f64, f64); 3]] = &[
[(1.0, 2.0), (2.0, 4.0), (3.0, 6.0)],
[(1.0, 2.0), (2.0, 4.0), (4.0, 8.0)],
[(1.0, 2.0), (2.0, 4.0), (5.0, 10.0)],
[(1.0, 2.0), (3.0, 6.0), (4.0, 8.0)],
[(1.0, 2.0), (3.0, 6.0), (5.0, 10.0)],
[(1.0, 2.0), (4.0, 8.0), (5.0, 10.0)],
[(2.0, 4.0), (3.0, 6.0), (4.0, 8.0)],
[(2.0, 4.0), (3.0, 6.0), (5.0, 10.0)],
[(2.0, 4.0), (4.0, 8.0), (5.0, 10.0)],
[(3.0, 6.0), (4.0, 8.0), (5.0, 10.0)],
];
assert_eq!(expected.len(), permutation_count(5, 3).unwrap());
assert_eq!(permutations, expected,);
}
#[test]
fn permutations_len_4_1() {
let s1 = [1., 2., 3., 4., 5.];
let s2 = [2., 4., 6., 8., 10.];
let permutations = permutations_generic(&s1, &s2, 4).collect_len();
let expected: &[[(f64, f64); 4]] = &[
[(1.0, 2.0), (2.0, 4.0), (3.0, 6.0), (4.0, 8.0)],
[(1.0, 2.0), (2.0, 4.0), (3.0, 6.0), (5.0, 10.0)],
[(1.0, 2.0), (2.0, 4.0), (4.0, 8.0), (5.0, 10.0)],
[(1.0, 2.0), (3.0, 6.0), (4.0, 8.0), (5.0, 10.0)],
[(2.0, 4.0), (3.0, 6.0), (4.0, 8.0), (5.0, 10.0)],
];
assert_eq!(expected.len(), permutation_count(5, 4).unwrap());
assert_eq!(permutations, expected,);
}
#[test]
fn permutations_len_4_2() {
let s1 = [1., 2., 3., 4., 5., 6.];
let s2 = [2., 4., 6., 8., 10., 12.];
let permutations = permutations_generic(&s1, &s2, 4).collect_len();
let expected: &[[(f64, f64); 4]] = &[
[(1.0, 2.0), (2.0, 4.0), (3.0, 6.0), (4.0, 8.0)],
[(1.0, 2.0), (2.0, 4.0), (3.0, 6.0), (5.0, 10.0)],
[(1.0, 2.0), (2.0, 4.0), (3.0, 6.0), (6.0, 12.0)],
[(1.0, 2.0), (2.0, 4.0), (4.0, 8.0), (5.0, 10.0)],
[(1.0, 2.0), (2.0, 4.0), (4.0, 8.0), (6.0, 12.0)],
[(1.0, 2.0), (2.0, 4.0), (5.0, 10.0), (6.0, 12.0)],
[(1.0, 2.0), (3.0, 6.0), (4.0, 8.0), (5.0, 10.0)],
[(1.0, 2.0), (3.0, 6.0), (4.0, 8.0), (6.0, 12.0)],
[(1.0, 2.0), (3.0, 6.0), (5.0, 10.0), (6.0, 12.0)],
[(1.0, 2.0), (4.0, 8.0), (5.0, 10.0), (6.0, 12.0)],
[(2.0, 4.0), (3.0, 6.0), (4.0, 8.0), (5.0, 10.0)],
[(2.0, 4.0), (3.0, 6.0), (4.0, 8.0), (6.0, 12.0)],
[(2.0, 4.0), (3.0, 6.0), (5.0, 10.0), (6.0, 12.0)],
[(2.0, 4.0), (4.0, 8.0), (5.0, 10.0), (6.0, 12.0)],
[(3.0, 6.0), (4.0, 8.0), (5.0, 10.0), (6.0, 12.0)],
];
assert_eq!(expected.len(), permutation_count(6, 4).unwrap());
assert_eq!(permutations, expected,);
}
}
}
pub mod spiral {
use super::*;
use std::f64::consts::{E, TAU};
use std::ops::Range;
use utils::*;
pub fn two_variable_optimization(
fitness_function: impl Fn([f64; 2]) -> f64,
options: Options,
) -> [f64; 2] {
let Options {
exponent_coefficient,
angle_coefficient,
num_lockon,
samples_per_rotation,
range,
turns: _,
} = options;
let advance = TAU / samples_per_rotation;
let mut best = ((f64::MIN, [1.; 2], 1.), [0.; 2]);
let mut last_best = f64::MIN;
let mut exponent_coefficients = [exponent_coefficient; 2];
for i in 0..num_lockon {
let mut theta = range.start;
while theta < range.end {
let r = E.powf(theta * angle_coefficient);
let a0 = r * theta.cos();
let b0 = r * theta.sin();
let a = a0 * exponent_coefficients[0] + best.1[0];
let b = b0 * exponent_coefficients[1] + best.1[1];
let coeffs = [a, b];
let fitness = fitness_function(coeffs);
if fitness > best.0 .0 {
best = ((fitness, [a0, b0], r), coeffs);
}
theta += advance;
}
if last_best == best.0 .0 && i != 0 {
return best.1;
}
let best_size = best.0;
exponent_coefficients[0] *= (best_size.1[0].abs() + best_size.2 / 32.).sqrt();
exponent_coefficients[1] *= (best_size.1[1].abs() + best_size.2 / 32.).sqrt();
last_best = best.0 .0;
}
best.1
}
pub fn three_variable_optimization(
fitness_function: impl Fn([f64; 3]) -> f64,
options: Options,
) -> [f64; 3] {
let Options {
exponent_coefficient,
angle_coefficient,
num_lockon,
samples_per_rotation,
range,
turns,
} = options;
let advance = TAU / samples_per_rotation;
let mut best = ((f64::MIN, [1.; 3], 1.), [0.; 3]);
let mut last_best = f64::MIN;
let mut exponent_coefficients = [exponent_coefficient; 3];
for i in 0..num_lockon {
let mut theta = range.start;
while theta < range.end {
let r = E.powf(theta * angle_coefficient);
let a0 = r * theta.sin() * (turns * theta).cos();
let b0 = r * theta.sin() * (turns * theta).sin();
let c0 = r * theta.cos();
let a = a0 * exponent_coefficients[0] + best.1[0];
let b = b0 * exponent_coefficients[1] + best.1[1];
let c = c0 * exponent_coefficients[2] + best.1[2];
let coeffs = [a, b, c];
let fitness = fitness_function(coeffs);
if fitness > best.0 .0 {
best = ((fitness, [a0, b0, c0], r), coeffs);
}
theta += advance;
}
if last_best == best.0 .0 && i != 0 {
return best.1;
}
let best_size = best.0;
exponent_coefficients[0] *= (best_size.1[0].abs() + best_size.2 / 32.).sqrt();
exponent_coefficients[1] *= (best_size.1[1].abs() + best_size.2 / 32.).sqrt();
exponent_coefficients[2] *= (best_size.1[2].abs() + best_size.2 / 32.).sqrt();
last_best = best.0 .0;
}
best.1
}
#[must_use]
#[derive(Debug, Clone, PartialEq)]
pub struct Options {
pub exponent_coefficient: f64,
pub angle_coefficient: f64,
pub num_lockon: usize,
pub samples_per_rotation: f64,
pub range: Range<f64>,
pub turns: f64,
}
impl Options {
pub fn new(level: u8) -> Self {
if !(1..=9).contains(&level) {
panic!("level of spiral::Options is out of bounds. Accepts 1..=9");
}
let level = level as usize - 1;
let num_lockon = [8, 8, 10, 12, 16, 16, 24, 32, 64];
let angle_coefficient = [0.23, 0.23, 0.15, 0.13, 0.11, 0.09, 0.07, 0.05, 0.03];
let samples_per_rotation = [15., 19., 29., 34., 38., 49., 71., 115., 217.];
let turns = [10., 12., 12., 14., 16., 16., 16., 16., 24.];
let range = [
-2.0..2.,
-2.0..4.,
-4.0..4.,
-4.0..6.,
-6.0..6.,
-6.0..6.,
-6.0..6.,
-6.0..8.,
-8.0..12.,
];
let num_lockon = num_lockon[level];
let angle_coefficient = angle_coefficient[level];
let samples_per_rotation = samples_per_rotation[level];
let turns = turns[level];
let range = range[level].clone();
let range = (range.start * TAU)..(range.end * TAU);
Self {
exponent_coefficient: 10.,
angle_coefficient,
num_lockon,
samples_per_rotation,
range,
turns,
}
}
}
impl Default for Options {
fn default() -> Self {
Self::new(5)
}
}
pub(super) struct SecondDegreePolynomial(pub(super) [f64; 3]);
impl Predictive for SecondDegreePolynomial {
fn predict_outcome(&self, predictor: f64) -> f64 {
self.0[0] + self.0[1] * predictor + self.0[2] * predictor * predictor
}
}
pub struct SpiralLinear<F: Fn(&LinearCoefficients, &[f64], &[f64]) -> f64>(pub F, pub Options);
impl<F: Fn(&LinearCoefficients, &[f64], &[f64]) -> f64> LinearEstimator for SpiralLinear<F> {
fn model_linear(&self, predictors: &[f64], outcomes: &[f64]) -> LinearCoefficients {
wrap_linear(two_variable_optimization(
#[inline(always)]
|model| self.0(&wrap_linear(model), predictors, outcomes),
self.1.clone(),
))
}
}
macro_rules! impl_estimator {
($(
$name:ident, $method:ident, $fn:ident, $ret:ident, $wrap:expr,
)+) => {
$(
impl $name for Options {
fn $method(&self, predictors: &[f64], outcomes: &[f64]) -> $ret {
$wrap($fn(
#[inline(always)]
|model| manhattan_distance(&$wrap(model), predictors, outcomes),
self.clone(),
))
}
}
)+
};
}
macro_rules! impl_estimator_trig {
($(
$name:ident, $method:ident, $fn:ident, $ret:ident, $wrap:expr,
)+) => {
$(
impl $name for Options {
fn $method(&self, predictors: &[f64], outcomes: &[f64], max_frequency: f64) -> $ret {
$wrap($fn(
#[inline(always)]
|model| trig_adjusted_manhattan_distance(&$wrap(model), model, predictors, outcomes, max_frequency),
self.clone(),
))
}
}
)+
};
}
impl_estimator!(
LinearEstimator,
model_linear,
two_variable_optimization,
LinearCoefficients,
wrap_linear,
PowerEstimator,
model_power,
two_variable_optimization,
PowerCoefficients,
wrap_power,
ExponentialEstimator,
model_exponential,
two_variable_optimization,
ExponentialCoefficients,
wrap_exponential,
LogisticEstimator,
model_logistic,
three_variable_optimization,
LogisticCoefficients,
wrap_logistic,
);
impl_estimator_trig!(
SineEstimator,
model_sine,
three_variable_optimization,
SineCoefficients,
SineCoefficients::wrap,
CosineEstimator,
model_cosine,
three_variable_optimization,
CosineCoefficients,
CosineCoefficients::wrap,
TangentEstimator,
model_tangent,
three_variable_optimization,
TangentCoefficients,
TangentCoefficients::wrap,
SecantEstimator,
model_secant,
three_variable_optimization,
SecantCoefficients,
SecantCoefficients::wrap,
CosecantEstimator,
model_cosecant,
three_variable_optimization,
CosecantCoefficients,
CosecantCoefficients::wrap,
CotangentEstimator,
model_cotangent,
three_variable_optimization,
CotangentCoefficients,
CotangentCoefficients::wrap,
);
impl PolynomialEstimator for Options {
fn model_polynomial(
&self,
predictors: &[f64],
outcomes: &[f64],
degree: usize,
) -> PolynomialCoefficients {
match degree {
1 => wrap_linear(two_variable_optimization(
#[inline(always)]
|model| manhattan_distance(&wrap_linear(model), predictors, outcomes),
self.clone(),
))
.into(),
2 => three_variable_optimization(
#[inline(always)]
|model| {
manhattan_distance(&SecondDegreePolynomial(model), predictors, outcomes)
},
self.clone(),
)
.into(),
_ => panic!("unsupported degree for polynomial spiral. Supports 1,2."),
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct SpiralLogisticWithCeiling {
pub opts: Options,
pub ceiling: f64,
}
impl SpiralLogisticWithCeiling {
pub fn new(opts: Options, ceiling: f64) -> Self {
Self { opts, ceiling }
}
}
impl LogisticEstimator for SpiralLogisticWithCeiling {
fn model_logistic(&self, predictors: &[f64], outcomes: &[f64]) -> LogisticCoefficients {
fn wrap(a: [f64; 2], max: f64) -> LogisticCoefficients {
LogisticCoefficients {
x0: a[0],
l: max,
k: a[1],
}
}
wrap(
two_variable_optimization(
#[inline]
|model| manhattan_distance(&wrap(model, self.ceiling), predictors, outcomes),
self.opts.clone(),
),
self.ceiling,
)
}
}
}
#[allow(missing_docs)]
pub mod gradient_descent {
use super::*;
use utils::BorrowedPolynomial;
pub struct ParallelOptions {
pub learn_rate: f64,
pub factor_decrease: f64,
pub rough_max_sign_changes: usize,
pub rough_slope_reduction_goal: f64,
pub rough_iterations_base: usize,
pub rough_iterations_per_degree: usize,
pub fine_iterations_base: usize,
pub fine_iterations_per_degree: usize,
}
impl Default for ParallelOptions {
fn default() -> Self {
Self {
learn_rate: 50.,
factor_decrease: 1.2,
rough_max_sign_changes: 100,
rough_slope_reduction_goal: 4.,
rough_iterations_base: 64,
rough_iterations_per_degree: 13,
fine_iterations_base: 4,
fine_iterations_per_degree: 3,
}
}
}
pub struct SimultaneousOptions {
pub learn_rate: f64,
pub factor_decrease: f64,
pub factor_increase: f64,
pub target_accuracy: f64,
}
impl SimultaneousOptions {
pub fn new(target_accuracy: f64) -> Self {
Self {
learn_rate: 0.0001,
factor_decrease: 1.5,
factor_increase: 1.8,
target_accuracy,
}
}
}
impl ParallelOptions {
#[inline(always)]
fn adjusted_slope(n: f64) -> f64 {
let n = n / 8.;
let ln = match n.partial_cmp(&0.) {
Some(std::cmp::Ordering::Less) => -((-n + 1.).ln()),
Some(std::cmp::Ordering::Greater) => (n + 1.).ln(),
_ => 0.,
};
ln * 8.
}
pub fn polynomial_optimization(
&self,
n: usize,
target_accuracy: f64,
fitness_function: impl Fn(&[f64]) -> f64,
) -> Vec<f64> {
let mut values: Vec<f64> = std::iter::repeat(0.).take(n).collect();
let mut factors: Vec<f64> = std::iter::repeat(1.).take(n).collect();
let dx = (target_accuracy / 2.).max(1e-11);
let get_slope = |dx: f64, i: usize, values: &mut [f64]| {
let y1 = fitness_function(values);
values[i] += dx;
let y2 = fitness_function(values);
values[i] -= dx;
(y1 - y2) / dx
};
let rough_iterations =
self.rough_iterations_base + self.rough_iterations_per_degree * n;
let fine_iterations = self.fine_iterations_base + self.fine_iterations_per_degree * n;
for _ in 0..rough_iterations {
for i in 0..n {
let first_slope = get_slope(dx, i, &mut values);
let mut slope_positive = first_slope.is_sign_positive();
let mut factor = factors[i];
let mut sign_changes = 0;
loop {
let slope = get_slope(dx, i, &mut values);
if slope.is_sign_positive() != slope_positive {
slope_positive = !slope_positive;
factor /= self.factor_decrease;
sign_changes += 1;
}
values[i] += Self::adjusted_slope(slope) * self.learn_rate * factor;
if slope.abs() < first_slope.abs() / self.rough_slope_reduction_goal
|| sign_changes > self.rough_max_sign_changes
|| slope.abs() < target_accuracy * 2.
{
break;
}
}
factors[i] = factor;
}
}
for _ in 0..fine_iterations {
for i in 0..n {
let mut factor = factors[i];
let mut slope_positive = get_slope(dx, i, &mut values).is_sign_positive();
loop {
let slope = get_slope(dx, i, &mut values);
if slope.is_sign_positive() != slope_positive {
slope_positive = !slope_positive;
factor /= self.factor_decrease;
}
values[i] += Self::adjusted_slope(slope) * self.learn_rate * factor;
if slope.abs() < target_accuracy {
break;
}
}
factors[i] = factor;
}
}
values
}
}
impl SimultaneousOptions {
#[inline(always)]
fn adjusted_slope(n: f64) -> f64 {
let n = n / 0.1;
let ln = match n.partial_cmp(&0.) {
Some(std::cmp::Ordering::Less) => -((-n + 1.).ln()),
Some(std::cmp::Ordering::Greater) => (n + 1.).ln(),
_ => 0.,
};
ln * 0.1
}
pub fn polynomial_optimization(
&self,
n: usize,
fitness_function: impl Fn(&[f64]) -> f64,
) -> Vec<f64> {
let mut values: Vec<f64> = std::iter::repeat(0.).take(n).collect();
let mut factors: Vec<f64> = std::iter::repeat(1.).take(n).collect();
let mut slopes: Vec<f64> = std::iter::repeat(0.).take(n).collect();
let dx = 1e-11;
let get_slope = |dx: f64, i: usize, values: &mut [f64]| {
let y1 = fitness_function(values);
values[i] += dx;
let y2 = fitness_function(values);
values[i] -= dx;
(y1 - y2) / dx
};
let mut i = 0_usize;
loop {
i += 1;
if i > 1_000_000 {
break;
}
for i in 0..n {
let slope = get_slope(dx, i, &mut values);
if slope.is_sign_positive() != slopes[i].is_sign_positive() {
if slope.abs() > slopes[i].abs() * 4.
&& self.factor_decrease * self.factor_decrease
< (slope.abs() / slopes[i].abs())
{
factors[i] /= self
.factor_decrease
.max((slope.abs() / slopes[i].abs()).sqrt());
} else {
factors[i] /= self.factor_decrease;
}
factors[i] = factors[i].max(1e-6);
} else {
factors[i] *= self.factor_increase;
}
slopes[i] = slope;
}
for i in 0..n {
let factor = factors[i];
let slope = slopes[i];
values[i] += Self::adjusted_slope(slope) * self.learn_rate * factor;
}
if i % 20 == 0 {
let mut v = 0.;
let mut factor = 1.;
for slope in &slopes {
v += slope.abs() * factor;
factor /= 5.0;
}
if v < self.target_accuracy {
break;
}
}
}
values
}
}
impl PolynomialEstimator for ParallelOptions {
fn model_polynomial(
&self,
predictors: &[f64],
outcomes: &[f64],
degree: usize,
) -> PolynomialCoefficients {
PolynomialCoefficients::from(self.polynomial_optimization(degree + 1, 1e-6, |v| {
-BorrowedPolynomial(v).determination_slice(predictors, outcomes)
}))
}
}
impl LinearEstimator for ParallelOptions {
fn model_linear(&self, predictors: &[f64], outcomes: &[f64]) -> LinearCoefficients {
let v = self.polynomial_optimization(2, 1e-8, |v| {
-LinearCoefficients { k: v[0], m: v[1] }.determination_slice(predictors, outcomes)
});
LinearCoefficients { k: v[0], m: v[1] }
}
}
impl PolynomialEstimator for SimultaneousOptions {
fn model_polynomial(
&self,
predictors: &[f64],
outcomes: &[f64],
degree: usize,
) -> PolynomialCoefficients {
PolynomialCoefficients::from(self.polynomial_optimization(degree + 1, |v| {
-BorrowedPolynomial(v).determination_slice(predictors, outcomes)
}))
}
}
impl LinearEstimator for SimultaneousOptions {
fn model_linear(&self, predictors: &[f64], outcomes: &[f64]) -> LinearCoefficients {
let v = self.polynomial_optimization(2, |v| {
-LinearCoefficients { k: v[0], m: v[1] }.determination_slice(predictors, outcomes)
});
LinearCoefficients { k: v[0], m: v[1] }
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn one_variable() {
let now = std::time::Instant::now();
let v = ParallelOptions::default()
.polynomial_optimization(1, 1e-12, |values| (values[0] - 42.4242).powi(2));
println!("{v:?} {:?}", now.elapsed());
}
#[test]
fn two_variable_regression() {
let now = std::time::Instant::now();
let x = [1.3, 4.7, 9.4];
let y = [4., 5.3, 6.7];
let v = ParallelOptions::default().polynomial_optimization(2, 1e-6, |values| {
-LinearCoefficients {
k: values[0],
m: values[1],
}
.determination_slice(&x, &y)
});
let coeffs = LinearCoefficients { k: v[0], m: v[1] };
println!(
"{coeffs} R² {} {:?}",
coeffs.determination_slice(&x, &y),
now.elapsed()
);
}
}
}
pub mod binary_search {
use super::*;
#[cfg(feature = "binary_search_rng")]
use rand::Rng;
use std::borrow::Cow;
#[allow(clippy::len_without_is_empty)] pub trait NVariableStorage:
std::ops::IndexMut<usize, Output = f64> + AsRef<[f64]> + AsMut<[f64]> + Clone
{
type Data;
type Given<'a>: AsRef<[f64]>
where
Self: 'a;
fn new_filled(data: &Self::Data, num: f64) -> Self;
fn borrow(&self) -> Self::Given<'_>;
}
impl<const LENGTH: usize> NVariableStorage for [f64; LENGTH] {
type Data = ();
type Given<'a> = [f64; LENGTH];
fn new_filled(_data: &(), num: f64) -> Self {
[num; LENGTH]
}
fn borrow(&self) -> Self::Given<'_> {
*self
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct VariableLengthStorage(pub usize);
impl NVariableStorage for Vec<f64> {
type Data = VariableLengthStorage;
type Given<'a> = &'a [f64];
fn new_filled(data: &Self::Data, num: f64) -> Self {
vec![num; data.0]
}
fn borrow(&self) -> Self::Given<'_> {
self
}
}
impl From<usize> for VariableLengthStorage {
fn from(n: usize) -> Self {
Self(n)
}
}
static SQRTS_FROM_F64_MAX: [f64; 61] = {
[
1.157920892373162e77,
3.402823669209385e38,
1.8446744073709552e19,
4294967296.0,
65536.0,
256.0,
16.0,
4.0,
2.0,
core::f64::consts::SQRT_2,
1.189207115002721,
1.0905077326652577,
1.0442737824274138,
1.0218971486541166,
1.0108892860517005,
1.0054299011128027,
1.0027112750502025,
1.0013547198921082,
1.0006771306930664,
1.0003385080526823,
1.0001692397053021,
1.0000846162726944,
1.0000423072413958,
1.0000211533969647,
1.0000105766425498,
1.0000052883072919,
1.0000026441501502,
1.0000013220742012,
1.0000006610368821,
1.0000003305183864,
1.0000001652591795,
1.0000000826295863,
1.0000000413147923,
1.000000020657396,
1.000000010328698,
1.000000005164349,
1.0000000025821745,
1.0000000012910872,
1.0000000006455436,
1.0000000003227718,
1.0000000001613858,
1.000000000080693,
1.0000000000403464,
1.0000000000201732,
1.0000000000100866,
1.0000000000050433,
1.0000000000025218,
1.0000000000012608,
1.0000000000006304,
1.0000000000003153,
1.0000000000001577,
1.0000000000000788,
1.0000000000000393,
1.0000000000000198,
1.0000000000000098,
1.0000000000000049,
1.0000000000000024,
1.0000000000000013,
1.0000000000000007,
1.0000000000000002,
1.0000000000000002,
]
};
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct Options {
pub iterations: usize,
pub precision: usize,
pub max: f64,
#[cfg(feature = "binary_search_rng")]
pub randomness_factor: f64,
#[cfg(feature = "random_subset_regression")]
pub random_subset_regression: Option<random_subset_regression::Config>,
}
impl Default for Options {
fn default() -> Self {
Self {
iterations: 30,
precision: 30,
max: f64::MAX,
#[cfg(feature = "binary_search_rng")]
randomness_factor: 1.,
#[cfg(feature = "random_subset_regression")]
random_subset_regression: Some(Default::default()),
}
}
}
impl Options {
pub fn max_precision(&self) -> Self {
let mut me = *self;
me.precision = 61;
me
}
pub fn n_variable_optimization_no_rng<NV: NVariableStorage>(
&self,
fitness_function: impl Fn(NV::Given<'_>) -> f64,
data: NV::Data,
) -> NV {
let initial_center = self.max.sqrt();
let mut values = NV::new_filled(&data, 0.);
let factors = if self.max == f64::MAX {
Cow::Borrowed(
&SQRTS_FROM_F64_MAX[0..(self.precision.min(SQRTS_FROM_F64_MAX.len()))],
)
} else {
let mut f = initial_center;
Cow::Owned(
(0..self.precision.min(61))
.map(|_| {
f = f.sqrt();
f
})
.collect::<Vec<_>>(),
)
};
let n = values.as_ref().len();
for _ in 0..self.iterations {
for i in 0..n {
let mut center = initial_center;
for factor in factors.as_ref() {
let center_over = center * factor;
let center_under = center / factor;
let value_over = center_over - 1.0;
let value_under = center_under - 1.0;
let value_negative = -value_under;
values[i] = value_negative;
let fitness_negative = fitness_function(values.borrow());
values[i] = value_over;
let fitness_over = fitness_function(values.borrow());
values[i] = value_under;
let fitness_under = fitness_function(values.borrow());
let best_current_sign = fitness_over.min(fitness_under);
if fitness_negative < best_current_sign {
values[i] = -value_over;
let fitness_negative_over = fitness_function(values.borrow());
values[i] = -value_under;
let fitness_negative_under = fitness_function(values.borrow());
let best_opposite_sign =
fitness_negative_over.min(fitness_negative_under);
if best_opposite_sign < best_current_sign {
if fitness_negative_under < fitness_negative_over {
center = -center_under;
} else {
center = -center_over;
values[i] = -value_over;
}
continue;
}
}
if !fitness_over.is_finite() || fitness_under < fitness_over {
center = center_under;
} else {
center = center_over;
values[i] = value_over;
}
}
}
}
values
}
#[cfg(feature = "binary_search_rng")]
pub fn n_variable_optimization<NV: NVariableStorage>(
&self,
fitness_function: impl Fn(NV::Given<'_>) -> f64,
data: NV::Data,
rng: &mut impl Rng,
) -> NV {
let initial_center = self.max.sqrt();
let mut values = NV::new_filled(&data, 0.);
let factors = if self.max == f64::MAX {
Cow::Borrowed(
&SQRTS_FROM_F64_MAX[0..(self.precision.min(SQRTS_FROM_F64_MAX.len()))],
)
} else {
let mut f = initial_center;
Cow::Owned(
(0..self.precision.min(61))
.map(|_| {
f = f.sqrt();
f
})
.collect::<Vec<_>>(),
)
};
let mut best_fitness = f64::MAX;
let mut best_values = values.clone();
let n = values.as_ref().len();
for iter in 0..self.iterations {
let progress = 1.0 - iter as f64 / self.iterations as f64;
let rng_factor =
1. + (2.0 * rng.gen::<f32>() as f64 - 1.) * self.randomness_factor * progress;
for i in 0..n {
let mut center = initial_center;
for factor in factors.as_ref() {
let factor = factor * rng_factor;
let center_over = center * factor;
let center_under = center / factor;
let value_over = center_over - 1.0;
let value_under = center_under - 1.0;
let value_negative = -value_under;
values[i] = value_negative;
let fitness_negative = fitness_function(values.borrow());
values[i] = value_over;
let fitness_over = fitness_function(values.borrow());
values[i] = value_under;
let fitness_under = fitness_function(values.borrow());
let best_current_sign = fitness_under.min(fitness_over);
if fitness_negative < best_current_sign {
values[i] = -value_over;
let fitness_negative_over = fitness_function(values.borrow());
values[i] = -value_under;
let fitness_negative_under = fitness_function(values.borrow());
let best_opposite_sign =
fitness_negative_over.min(fitness_negative_under);
if best_opposite_sign < best_current_sign {
if fitness_negative_under < fitness_negative_over {
center = -center_under;
} else {
center = -center_over;
values[i] = -value_over;
}
continue;
}
}
if !fitness_over.is_finite() || fitness_under < fitness_over {
center = center_under;
} else {
center = center_over;
values[i] = value_over;
}
}
}
let fitness = fitness_function(values.borrow());
if fitness < best_fitness {
best_values.as_mut().copy_from_slice(values.as_ref());
best_fitness = fitness;
}
}
best_values
}
}
#[cfg(feature = "binary_search_rng")]
macro_rules! impl_estimator {
($(
$name:ident, $method:ident, $ret:ident, $wrap:expr,
)+) => {
$(
impl $name for Options {
fn $method(&self, predictors: &[f64], outcomes: &[f64]) -> $ret {
use rand::SeedableRng;
let mut rng = rand_xorshift::XorShiftRng::from_rng(rand::thread_rng()).unwrap();
#[cfg(feature = "random_subset_regression")]
if let Some(random_config) = &self.random_subset_regression {
let subsets =
random_subset_regression::Subsets::new(
predictors,
outcomes,
random_config,
&mut rng
);
if let Some(subsets) = subsets {
return $wrap(self.n_variable_optimization(
|model| {
let (predictors, outcomes) = subsets.next_subset();
-utils::manhattan_distance(
&$wrap(model),
predictors,
outcomes,
)
},
(),
&mut rng,
));
}
}
$wrap(self.n_variable_optimization(
#[inline(always)]
|model| -utils::manhattan_distance(&$wrap(model), predictors, outcomes),
(),
&mut rng,
))
}
}
)+
};
}
#[cfg(feature = "binary_search_rng")]
macro_rules! impl_estimator_trig {
($(
$name:ident, $method:ident, $ret:ident, $wrap:expr,
)+) => {
$(
impl $name for Options {
fn $method(&self, predictors: &[f64], outcomes: &[f64], max_frequency: f64) -> $ret {
use rand::SeedableRng;
let mut rng = rand_xorshift::XorShiftRng::from_rng(rand::thread_rng()).unwrap();
#[cfg(feature = "random_subset_regression")]
if let Some(random_config) = &self.random_subset_regression {
let subsets =
random_subset_regression::Subsets::new(
predictors,
outcomes,
random_config,
&mut rng
);
if let Some(subsets) = subsets {
return $wrap(self.n_variable_optimization(
|model| {
let (predictors, outcomes) = subsets.next_subset();
-utils::trig_adjusted_manhattan_distance(
&$wrap(model),
model,
predictors,
outcomes,
max_frequency,
)
},
(),
&mut rng,
));
}
}
$wrap(self.n_variable_optimization(
#[inline(always)]
|model| {
-utils::trig_adjusted_manhattan_distance(
&$wrap(model),
model,
predictors,
outcomes,
max_frequency
)
},
(),
&mut rng,
))
}
}
)+
};
}
#[cfg(feature = "binary_search_rng")]
impl_estimator!(
LinearEstimator,
model_linear,
LinearCoefficients,
utils::wrap_linear,
PowerEstimator,
model_power,
PowerCoefficients,
utils::wrap_power,
ExponentialEstimator,
model_exponential,
ExponentialCoefficients,
utils::wrap_exponential,
LogisticEstimator,
model_logistic,
LogisticCoefficients,
utils::wrap_logistic,
);
#[cfg(feature = "binary_search_rng")]
impl_estimator_trig!(
SineEstimator,
model_sine,
SineCoefficients,
SineCoefficients::wrap,
CosineEstimator,
model_cosine,
CosineCoefficients,
CosineCoefficients::wrap,
TangentEstimator,
model_tangent,
TangentCoefficients,
TangentCoefficients::wrap,
SecantEstimator,
model_secant,
SecantCoefficients,
SecantCoefficients::wrap,
CosecantEstimator,
model_cosecant,
CosecantCoefficients,
CosecantCoefficients::wrap,
CotangentEstimator,
model_cotangent,
CotangentCoefficients,
CotangentCoefficients::wrap,
);
#[cfg(feature = "binary_search_rng")]
impl PolynomialEstimator for Options {
fn model_polynomial(
&self,
predictors: &[f64],
outcomes: &[f64],
degree: usize,
) -> PolynomialCoefficients {
use rand::SeedableRng;
let mut rng = rand_xorshift::XorShiftRng::from_rng(rand::thread_rng()).unwrap();
#[cfg(feature = "random_subset_regression")]
if let Some(random_config) = &self.random_subset_regression {
let subsets = random_subset_regression::Subsets::new(
predictors,
outcomes,
random_config,
&mut rng,
);
if let Some(subsets) = subsets {
return PolynomialCoefficients {
coefficients: (self.n_variable_optimization(
|model| {
let (predictors, outcomes) = subsets.next_subset();
-utils::manhattan_distance(
&utils::BorrowedPolynomial(model),
predictors,
outcomes,
)
},
(degree + 1).into(),
&mut rng,
)),
};
}
}
PolynomialCoefficients {
coefficients: (self.n_variable_optimization(
#[inline(always)]
|model| {
-utils::manhattan_distance(
&utils::BorrowedPolynomial(model),
predictors,
outcomes,
)
},
(degree + 1).into(),
&mut rng,
)),
}
}
}
#[cfg(test)]
mod tests {
use super::super::*;
use super::*;
#[test]
fn one_variable_regression() {
let now = std::time::Instant::now();
let values = super::Options::default()
.max_precision()
.n_variable_optimization_no_rng::<[f64; 1]>(
|s| (s[0] - 42.42424242424242).abs(),
(),
);
println!("{values:?} {:?}", now.elapsed());
}
#[test]
#[cfg(feature = "binary_search_rng")]
fn two_variable_regression() {
let mut rng = rand::thread_rng();
let now = std::time::Instant::now();
let x = [1.3, 4.7, 9.4];
let y = [4., 5.3, 6.7];
let v = Options::default().n_variable_optimization::<[f64; 2]>(
|values| {
-utils::manhattan_distance(
&LinearCoefficients {
k: values[0],
m: values[1],
},
&x,
&y,
)
},
(),
&mut rng,
);
let coeffs = LinearCoefficients { k: v[0], m: v[1] };
println!(
"{coeffs} R² {} {:?}",
coeffs.determination_slice(&x, &y),
now.elapsed()
);
}
#[test]
#[cfg(feature = "binary_search_rng")]
fn second_degree_regression() {
let _rng = rand::thread_rng();
let now = std::time::Instant::now();
let x = [1.3, 4.7, 9.4];
let y = [4., 5.3, 6.7];
let coeffs = Options::default().model_polynomial(&x, &y, 2);
println!(
"{coeffs} R² {} {:?}",
coeffs.determination_slice(&x, &y),
now.elapsed()
);
}
#[test]
#[cfg(feature = "binary_search_rng")]
fn two_variable_optimization() {
use rand::SeedableRng;
let mut rng = rand_xorshift::XorShiftRng::from_rng(rand::thread_rng()).unwrap();
let now = std::time::Instant::now();
let coeffs = Options::default()
.max_precision()
.n_variable_optimization::<[f64; 2]>(
|[v1, v2]| (v1 - 5.959).abs() + (v2 - (-234.234)).abs(),
(),
&mut rng,
);
println!("{coeffs:?} {:?}", now.elapsed());
}
}
}
#[cfg(feature = "random_subset_regression")]
#[allow(dead_code)] pub mod random_subset_regression {
use rand::prelude::Distribution;
use rand::Rng;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Config {
pub subset_length: usize,
pub minimum_factor_of_length: usize,
pub subsets_count: usize,
}
impl Default for Config {
fn default() -> Self {
Self {
subset_length: 100,
minimum_factor_of_length: 4,
subsets_count: 8,
}
}
}
pub(crate) struct Subsets {
subsets: Vec<(Vec<f64>, Vec<f64>)>,
i: std::rc::Rc<std::cell::RefCell<usize>>,
}
impl Subsets {
pub(crate) fn new(
x: &[f64],
y: &[f64],
config: &Config,
rng: &mut impl Rng,
) -> Option<Self> {
if x.len() != y.len() {
return None;
}
if x.len() < config.subset_length * config.minimum_factor_of_length {
return None;
}
if config.minimum_factor_of_length < 2 {
eprintln!("random_subset_regression failed because configured `minimum_factor_of_length` is less than 2");
return None;
}
if config.subsets_count < 2 {
eprintln!(
"random_subset_regression failed because configured `subsets_count` is less than 2"
);
return None;
}
let distribution = rand::distributions::Uniform::new(0, x.len());
let subsets = (0..config.subsets_count)
.map(|_| {
let mut new_x = Vec::with_capacity(config.subset_length);
let mut new_y = Vec::with_capacity(config.subset_length);
for _ in 0..config.subset_length {
let idx = distribution.sample(rng);
new_x.push(x[idx]);
new_y.push(y[idx]);
}
(new_x, new_y)
})
.collect();
Some(Self {
subsets,
i: std::rc::Rc::new(std::cell::RefCell::new(0)),
})
}
pub(crate) fn next_subset(&self) -> (&[f64], &[f64]) {
let index = *self.i.borrow();
let (predictors, outcomes) = &self.subsets[index];
*self.i.borrow_mut() += 1;
if index + 1 == self.subsets.len() {
*self.i.borrow_mut() = 0;
}
(predictors, outcomes)
}
}
}
mod utils {
use super::*;
#[inline(always)]
pub(crate) fn manhattan_distance(
model: &impl Predictive,
predictors: &[f64],
outcomes: &[f64],
) -> f64 {
let mut error = 0.;
for (predictor, outcome) in predictors.iter().copied().zip(outcomes.iter().copied()) {
let predicted = model.predict_outcome(predictor);
let length = (predicted - outcome).abs();
error += length;
}
-error
}
pub(super) fn trig_adjusted_manhattan_distance(
model: &impl Predictive,
params: [f64; 3],
predictors: &[f64],
outcomes: &[f64],
max_frequency: f64,
) -> f64 {
let mut base = manhattan_distance(model, predictors, outcomes);
if params[0].is_sign_negative()
|| params[1].is_sign_negative()
|| params[2].is_sign_negative()
{
base *= 10.;
}
if params[1] > max_frequency {
base *= 10.;
}
base
}
#[inline(always)]
pub(super) fn wrap_linear(a: [f64; 2]) -> LinearCoefficients {
LinearCoefficients { k: a[1], m: a[0] }
}
#[inline(always)]
pub(super) fn wrap_power(a: [f64; 2]) -> PowerCoefficients {
PowerCoefficients {
e: a[1],
k: a[0],
predictor_additive: 0.,
outcome_additive: 0.,
}
}
#[inline(always)]
pub(super) fn wrap_exponential(a: [f64; 2]) -> ExponentialCoefficients {
ExponentialCoefficients {
b: a[1],
k: a[0],
predictor_additive: 0.,
outcome_additive: 0.,
}
}
#[inline(always)]
pub(super) fn wrap_logistic(a: [f64; 3]) -> LogisticCoefficients {
LogisticCoefficients {
x0: a[0],
l: a[1],
k: a[2],
}
}
pub(super) struct BorrowedPolynomial<'a>(pub(super) &'a [f64]);
impl<'a> Predictive for BorrowedPolynomial<'a> {
#[inline(always)]
fn predict_outcome(&self, predictor: f64) -> f64 {
match self.0.len() {
0 => 0.,
1 => self.0[0],
2 => self.0[1] * predictor + self.0[0],
3 => self.0[2] * predictor * predictor + self.0[1] * predictor + self.0[0],
4 => {
let p2 = predictor * predictor;
self.0[3] * p2 * predictor + self.0[2] * p2 + self.0[1] * predictor + self.0[0]
}
_ => {
let mut out = 0.0;
let mut pred = 1.;
for coefficient in self.0.iter().copied() {
out += pred * coefficient;
pred *= predictor;
}
out
}
}
}
}
}