pub use self::gamma::{ln_gamma, gamma, lower_incomplete_gamma, upper_incomplete_gamma, digamma};
pub use self::beta::{ln_beta, beta, incomplete_beta, regularized_incomplete_beta, inverse_regularized_incomplete_beta, multivariate_beta};
pub use self::autocorrelation::{autocorrelation, Autocorrelation, autocorrelation_array, PartialAutocorrelation, partial_autocorrelation};
pub use self::discrete_distributions::{BernoulliDistribution, BinomialDistribution, DiscreteUniformDistribution, PoissonDistribution};
pub use self::continuous_distributions::{
ContinuousUniformDistribution, NormalDistribution, ExponentialDistribution, LaplaceDistribution, GammaDistribution, StudentsTDistribution,
ParetoDistribution, LogNormalDistribution, FDistribution,
};
pub use self::stat_tests::{
ConfidenceLevel, CointegrationResult, cointegration,
ADFType, ADFResult, adf,
TTestResult, TTestTwoSampleCase, t_test_two_sample, t_test_lr,
nested_f_statistic, FTestResult, f_test_anova,
GrangerCausalityTestType, GrangerCausalityRejectReason, GrangerCausalityResult, GrangerCausalitySettings,
granger_causality_test, simple_granger_causality_test,
};
pub use self::linear_regression_analysis::{LinearRegressionFeatureResult, LinearRegressionResult, LinearRegressionSettings, LinearRegressionAnalysis};
pub use self::mediation_analysis::{
BKMediationAnalysisStep, BKMediationAnalysisFinalStep, BKMediationAnalysisResult, BaronKennyMeriationAnalysis, SobelTestResult, sobel_test,
MJYAnalysisStep, MJYAnalysisFinalStep, MJYAnalysisResult, MullerJuddYzerbytAnalysis,
};
pub use self::arima::{
ARResult, AROrderMethod, ARSettings, AR,
};
mod gamma;
mod beta;
pub mod autocorrelation;
pub mod continuous_distributions;
pub mod discrete_distributions;
pub mod stat_tests;
mod linear_regression_analysis;
pub mod mediation_analysis;
pub mod arima;
use std::borrow::Borrow;
use ndarray::{Array1, Array2};
use nalgebra::DMatrix;
#[cfg(feature = "serde")]
use serde::{Serialize, Deserialize};
use crate::error::{DigiFiError, ErrorTitle};
use crate::utilities::{
compare_len, MatrixConversion, FeatureCollection,
maths_utils::factorial,
loss_functions::{LossFunction, SSE},
};
#[derive(Clone, Copy, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum ProbabilityDistributionType {
Discrete,
Continuous,
}
pub trait ProbabilityDistribution {
fn distribution_type() -> ProbabilityDistributionType;
fn mean(&self) -> f64;
fn median(&self) -> Vec<f64>;
fn mode(&self) -> Vec<f64>;
fn variance(&self) -> f64;
fn skewness(&self) -> f64;
fn excess_kurtosis(&self) -> f64;
fn entropy(&self) -> Result<f64, DigiFiError>;
fn pdf(&self, x: f64) -> Result<f64, DigiFiError>;
fn cdf(&self, x: f64) -> Result<f64, DigiFiError>;
fn inverse_cdf(&self, p: f64) -> Result<f64, DigiFiError>;
fn mgf(&self, t: usize) -> f64;
fn pdf_iter<T, I>(&self, x: T) -> Result<Array1<f64>, DigiFiError>
where
T: Iterator<Item = I> + ExactSizeIterator,
I: Borrow<f64>,
{
x.map(|x_| self.pdf(*x_.borrow()) ).collect()
}
fn cdf_iter<T, I>(&self, x: T) -> Result<Array1<f64>, DigiFiError>
where
T: Iterator<Item = I> + ExactSizeIterator,
I: Borrow<f64>,
{
x.map(|x_| self.cdf(*x_.borrow()) ).collect()
}
fn inverse_cdf_iter<T, I>(&self, p: T) -> Result<Array1<f64>, DigiFiError>
where
T: Iterator<Item = I> + ExactSizeIterator,
I: Borrow<f64>,
{
p.map(|p_| self.inverse_cdf(*p_.borrow()) ).collect()
}
fn mgf_iter<T, I>(&self, t: T) -> Array1<f64>
where
T: Iterator<Item = I>,
I: Borrow<usize>
{
t.map(|t_| self.mgf(*t_.borrow()) ).collect()
}
}
pub trait RiskMeasure: ProbabilityDistribution + ErrorTitle {
fn validate_alpha(&self, alpha: f64) -> Result<(), DigiFiError> {
if (alpha < 0.0) || (1.0 < alpha) {
return Err(DigiFiError::ParameterConstraint {
title: Self::error_title(),
constraint: "The argument `alpha` must be in the range `[0, 1]`.".to_owned(),
});
}
Ok(())
}
fn value_at_risk(&self, alpha: f64) -> Result<f64, DigiFiError> {
self.validate_alpha(alpha)?;
Ok(self.inverse_cdf(alpha)?)
}
fn expected_shortfall(&self, alpha: f64) -> Result<f64, DigiFiError>;
fn value_at_risk_iter<T, I>(&self, alphas: T) -> Result<Array1<f64>, DigiFiError>
where
T: Iterator<Item = I>,
I: Borrow<f64>,
{
alphas.map(|alpha| self.value_at_risk(*alpha.borrow()) ).collect()
}
fn expected_shortfall_iter<T, I>(&self, alphas: T) -> Result<Array1<f64>, DigiFiError>
where
T: Iterator<Item = I>,
I: Borrow<f64>,
{
alphas.map(|alpha| self.expected_shortfall(*alpha.borrow()) ).collect()
}
}
pub fn covariance(array_1: &Array1<f64>, array_2: &Array1<f64>, ddof: usize) -> Result<f64, DigiFiError> {
let error_title: String = String::from("Covariance");
compare_len(&array_1.iter(), &array_2.iter(), "array_1", "array_2")?;
let array_1_mean: f64 = match array_1.mean() {
Some(mean) => mean,
None => return Err(DigiFiError::MeanCalculation { title: error_title, series: "array_1".to_owned(), }),
};
let array_2_mean: f64 = match array_2.mean() {
Some(v) => v,
None => return Err(DigiFiError::MeanCalculation { title: error_title, series: "array_2".to_owned(), })
};
Ok(
array_1.iter()
.zip(array_2.iter())
.fold(0.0, |total, (x, y)| total + (x - array_1_mean) * (y - array_2_mean) ) / (array_1.len() - ddof) as f64
)
}
pub fn skewness(array: &Array1<f64>) -> Result<f64, DigiFiError> {
let error_title: String = String::from("Skewness");
if array.len() <= 1 {
return Err(DigiFiError::ValidationError { title: error_title, details: "The length of `array` must be at least `2`.".to_owned(), });
}
let mean: f64 = match array.mean() {
Some(v) => v,
None => return Err(DigiFiError::MeanCalculation { title: error_title, series: "array".to_owned(), }),
};
let (num, denom) = array.iter().fold((0.0, 0.0), |(num, denom), v| (num + (v - mean).powi(3), denom + (v - mean).powi(2)) );
let array_len: f64 = array.len() as f64;
Ok((num / array_len) / (denom / array_len).powf(3.0/2.0))
}
pub fn kurtosis(array: &Array1<f64>) -> Result<f64, DigiFiError> {
let error_title: String = String::from("Kurtosis");
if array.len() <= 1 {
return Err(DigiFiError::ValidationError { title: error_title, details: "The length of `array` must be at least `2`.".to_owned(), });
}
let mean: f64 = match array.mean() {
Some(v) => v,
None => return Err(DigiFiError::MeanCalculation { title: error_title, series: "array".to_owned(), }),
};
let (num, denom) = array.iter().fold((0.0, 0.0), |(num, denom), v| (num + (v - mean).powi(2), denom + (v - mean).powi(2)) );
let array_len: f64 = array.len() as f64;
Ok((num / array_len) / (denom / array_len).powi(2))
}
pub fn change_frequency_std(std: f64, n: f64) -> f64 {
std * n.sqrt()
}
pub fn pearson_correlation(array_1: &Array1<f64>, array_2: &Array1<f64>, ddof: usize) -> Result<f64, DigiFiError> {
let cov: f64 = covariance(array_1, array_2, ddof)?;
let ddof: f64 = ddof as f64;
Ok(cov / (array_1.std(ddof) * array_2.std(ddof)))
}
pub fn n_choose_r(n: u128, r: u128) -> Result<u128, DigiFiError> {
if n < r {
return Err(DigiFiError::ParameterConstraint {
title: "n Choose r".to_owned(),
constraint: "The value of variable n must be larger or equal to the value of variable r.".to_owned(),
});
}
Ok(factorial(n) / (factorial(n - r) * factorial(r)))
}
pub fn linear_regression(x: &Array2<f64>, y: &Array1<f64>) -> Result<Array1<f64>, DigiFiError> {
if x.dim().0 != y.len() {
return Err(DigiFiError::UnmatchingLength { array_1: "x".to_owned(), array_2: "y".to_owned(), });
}
let square_matrix: Array2<f64> = x.t().dot(x);
let n_square_matrix: DMatrix<f64> = MatrixConversion::ndarray_to_nalgebra(square_matrix);
let n_inv_matrix: DMatrix<f64> = n_square_matrix.try_inverse()
.ok_or(DigiFiError::Other { title: "Linear Regression".to_owned(), details: "No matrix inverse exists to perform linear regression.".to_owned(), })?;
let inv_matrix: Array2<f64> = MatrixConversion::nalgebra_to_ndarray(n_inv_matrix)?;
Ok(inv_matrix.dot(&x.t().dot(&y.t())))
}
pub fn se_lr_coefficient(y: &Array1<f64>, y_prediction: &Array1<f64>, x: &Array1<f64>, ddof: usize) -> Result<f64, DigiFiError> {
let error_title: String = String::from("Standard Error of Linear Regression Coefficient");
compare_len(&x.iter(), &y_prediction.iter(), "x", "y_prediction")?;
compare_len(&x.iter(), &y.iter(), "x", "y")?;
let loss_function: SSE = SSE;
let denominator: f64 = match y.len().checked_sub(ddof) {
Some(v) => v as f64, None => return Err(DigiFiError::Other { title: error_title, details: "There are fewer data points in `y` array than `ddof`.".to_owned(), }),
};
let estimated_var: f64 = loss_function.loss_iter(y.iter(), y_prediction.iter())? / denominator;
let mean_of_x: f64 = x.mean().ok_or(DigiFiError::MeanCalculation { title: error_title, series: "x".to_owned(), })?;
let estimated_var_beta_denominator: f64 = x.map(|v| (v - mean_of_x).powi(2) ).sum();
Ok((estimated_var / estimated_var_beta_denominator).sqrt())
}
pub fn r_squared(real_values: &Array1<f64>, predicted_values: &Array1<f64>) -> Result<f64, DigiFiError> {
compare_len(&real_values.iter(), &predicted_values.iter(), "real_values", "predicted_values")?;
let real_values_mean: f64 = real_values.mean()
.ok_or(DigiFiError::MeanCalculation { title: "R Square".to_owned(), series: "real_values".to_owned(), })?;
let (residual_sum, total_sum) = real_values.iter().zip(predicted_values.iter())
.fold((0.0, 0.0), |(residual_sum, total_sum), (real, predicted)| {
(residual_sum + (real - predicted).powi(2), total_sum + (real - real_values_mean).powi(2))
} );
Ok(1.0 - residual_sum / total_sum)
}
pub fn adjusted_r_squared(real_values: &Array1<f64>, predicted_values: &Array1<f64>, sample_size: usize, k_variables: usize) -> Result<f64, DigiFiError> {
Ok(1.0 - (1.0-r_squared(real_values, predicted_values)?) * (sample_size as f64 - 1.0) / ((sample_size - k_variables) as f64 - 1.0))
}
pub fn variance_inflation_factor(xis: &mut FeatureCollection, xi: &Array1<f64>) -> Result<Option<f64>, DigiFiError> {
xis.add_constant = true;
let mut settings: LinearRegressionSettings = LinearRegressionSettings::disable_all();
settings.enable_r_squared = true;
let lr: LinearRegressionAnalysis = LinearRegressionAnalysis::new(settings);
let lr_result: LinearRegressionResult = lr.run(xis, xi)?;
match lr_result.r_squared {
Some(r) if r != 1.0 => Ok(Some(1.0 / (1.0 - r))),
_ => Ok(None),
}
}
pub fn akaike_information_criterion(l: f64, k: usize) -> f64 {
2.0 * (k as f64 - l.ln())
}
pub fn akaike_information_criterion_log(ll:f64, k: usize) -> f64 {
2.0 * (k as f64 - ll)
}
pub fn bayesian_information_criterion(l: f64, k: usize, n: usize) -> f64 {
(k as f64) * (n as f64).ln() - 2.0 * l.ln()
}
pub fn bayesian_information_criterion_log(ll: f64, k: usize, n: usize) -> f64 {
(k as f64) * (n as f64).ln() - 2.0 * ll
}
#[cfg(test)]
mod tests {
use ndarray::{Array1, array};
use crate::utilities::TEST_ACCURACY;
#[test]
fn unit_test_covariance() -> () {
use crate::statistics::covariance;
let array_1: Array1<f64> = array![1.0, 2.0, 3.0, 4.0, 5.0];
let array_2: Array1<f64> = array![1.0, 5.0, -9.0, 2.0, 4.0];
assert!((covariance(&array_1, &array_2, 0).unwrap() - array![-0.8, -4.4, 0.0, 1.4, 6.8].sum()/5.0).abs() < TEST_ACCURACY);
}
#[test]
fn unit_test_skewness() -> () {
use crate::statistics::skewness;
let array_: Array1<f64> = array![1.0, 1.0, 3.0, 4.0, 5.0];
let numerator: f64 = array!(-5.832, -5.832, 0.008, 1.728, 10.648).mean().unwrap();
let denominator: f64 = array!(3.24, 3.24, 0.04, 1.44, 4.84_f64).mean().unwrap().powf(3.0/2.0);
assert!((skewness(&array_).unwrap() - numerator/denominator).abs() < TEST_ACCURACY);
}
#[test]
fn unit_test_kurtosis() -> () {
use crate::statistics::kurtosis;
let array_: Array1<f64> = array![1.0, 1.0, 3.0, 4.0, 5.0];
let numerator: f64 = array!(3.24, 3.24, 0.04, 1.44, 4.84_f64).mean().unwrap();
let denuminator: f64 = array!(3.24, 3.24, 0.04, 1.44, 4.84_f64).mean().unwrap().powi(2);
assert!((kurtosis(&array_).unwrap() - numerator/denuminator).abs() < TEST_ACCURACY);
}
#[test]
fn unit_test_pearson_correlation() -> () {
use crate::statistics::pearson_correlation;
let array_1: Array1<f64> = Array1::from_vec(vec![1.0, 2.0, 3.0, 4.0]);
let array_2: Array1<f64> = Array1::from_vec(vec![1.0, 2.0, 3.0, 4.0]);
assert!((pearson_correlation(&array_1, &array_2, 1).unwrap() - 1.0).abs() < TEST_ACCURACY);
let array_1: Array1<f64> = Array1::from_vec(vec![1.0, -1.0]);
let array_2: Array1<f64> = Array1::from_vec(vec![-1.0, 1.0]);
assert!((pearson_correlation(&array_1, &array_2, 1).unwrap() + 1.0).abs() < TEST_ACCURACY);
}
#[test]
fn unit_test_n_choose_r() {
use crate::statistics::n_choose_r;
assert_eq!(10, n_choose_r(5, 2).unwrap());
}
#[test]
fn unit_test_linear_regression() -> () {
use ndarray::Array2;
use crate::statistics::linear_regression;
let y: Array1<f64> = array![1.0, 2.0, 3.0];
let x: Array2<f64> = array![[1.0, 3.0, 1.0], [4.0, 4.0, 1.0], [6.0, 5.0, 1.0]];
let params: Array1<f64> = linear_regression(&x, &y).unwrap();
let results: Array1<f64> = Array1::from(vec![-2.49556592e-16, 1.0, -2.0]);
assert!((¶ms - &results).map(|v| v.abs() ).sum() < TEST_ACCURACY);
}
}