use std::fmt::Display;
use ndarray::{Array1, Array2};
#[cfg(feature = "serde")]
use serde::{Serialize, Deserialize};
use crate::error::{DigiFiError, ErrorTitle};
use crate::utilities::{
LARGE_TEXT_BREAK, SMALL_TEXT_BREAK, FeatureCollection,
loss_functions::{LossFunction, SSE},
};
use crate::statistics::{self, stat_tests};
#[derive(Clone, Debug, Default)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct LinearRegressionFeatureResult {
pub coefficient_name: Option<String>,
pub coefficient: f64,
pub standard_error: Option<f64>,
pub covariance: Option<f64>,
pub pearson_correlation: Option<f64>,
pub t_test_h0: Option<f64>,
pub t_test_reject_h0: Option<bool>,
pub t_test_t_score: Option<f64>,
pub t_test_dof: Option<f64>,
pub t_test_p_value: Option<f64>,
pub t_test_cl: Option<f64>,
pub f_test_reject_h0: Option<bool>,
pub f_test_f_score: Option<f64>,
pub f_test_k: Option<f64>,
pub f_test_n: Option<f64>,
pub f_test_overall_mean: Option<f64>,
pub f_test_explained_variance: Option<f64>,
pub f_test_unexplained_variance: Option<f64>,
pub f_test_dof_1: Option<f64>,
pub f_test_dof_2: Option<f64>,
pub f_test_p_value: Option<f64>,
pub f_test_cl: Option<f64>,
pub cointegrated: Option<bool>,
pub cointegration_cl: Option<f64>,
pub vif: Option<f64>,
}
impl Display for LinearRegressionFeatureResult {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let result: String = format!("\tCoefficient {}\n", match &self.coefficient_name { Some(v) => v.to_string(), None => "".to_owned(), })
+ &format!("\tCoefficient: {}\n", self.coefficient)
+ &format!("\tStandard Error: {}\n", self.standard_error.unwrap_or(f64::NAN))
+ &format!("\tCovariance: {}\n", self.covariance.unwrap_or(f64::NAN))
+ &format!("\tPearson Correlation: {}\n", self.pearson_correlation.unwrap_or(f64::NAN))
+ &format!("\tVariance Inflation Factor: {}\n", self.vif.unwrap_or(f64::NAN))
+ SMALL_TEXT_BREAK
+ &format!("\tt-Test Degrees of Freedom: {}\n", self.t_test_dof.unwrap_or(f64::NAN))
+ &format!("\tt-Test Null Hypothesis: {}\n", self.t_test_h0.unwrap_or(f64::NAN))
+ &format!("\tt-Test Reject Null Hypothesis: {}\n", match self.t_test_reject_h0 { Some(b) => b.to_string(), None => "".to_owned(), })
+ &format!("\tt-statistic: {}\n", self.t_test_t_score.unwrap_or(f64::NAN))
+ &format!("\tp-value: {}\n", self.t_test_p_value.unwrap_or(f64::NAN))
+ &format!("\tConfidence Level: {}\n", self.t_test_cl.unwrap_or(f64::NAN))
+ SMALL_TEXT_BREAK
+ &format!("\tF-Test Number of Groups: {}\n", self.f_test_k.unwrap_or(f64::NAN))
+ &format!("\tF-Test Total Data Size: {}\n", self.f_test_n.unwrap_or(f64::NAN))
+ &format!("\tF-Test Overall Mean: {}\n", self.f_test_overall_mean.unwrap_or(f64::NAN))
+ &format!("\tF-Test Explained Variance: {}\n", self.f_test_explained_variance.unwrap_or(f64::NAN))
+ &format!("\tF-Test Unexplained Variance: {}\n", self.f_test_unexplained_variance.unwrap_or(f64::NAN))
+ &format!("\tF-Test Numerator Degrees of Freedom (d_1): {}\n", self.f_test_dof_1.unwrap_or(f64::NAN))
+ &format!("\tF-Test Denominator Degrees of Freedom (d_2): {}\n", self.f_test_dof_2.unwrap_or(f64::NAN))
+ &format!("\tF-Test Reject Null Hypothesis: {}\n", match self.f_test_reject_h0 { Some(b) => b.to_string(), None => "".to_owned(), })
+ &format!("\tF-statistic: {}\n", self.f_test_f_score.unwrap_or(f64::NAN))
+ &format!("\tp-value: {}\n", self.f_test_p_value.unwrap_or(f64::NAN))
+ &format!("\tConfidence Level: {}\n", self.f_test_cl.unwrap_or(f64::NAN))
+ SMALL_TEXT_BREAK
+ &format!("\tCointegrated: {}\n", match self.cointegrated { Some(b) => b.to_string(), None => "".to_owned(), })
+ &format!("\tConfidence Level: {}\n", self.cointegration_cl.unwrap_or(f64::NAN));
write!(f, "{}", result)
}
}
#[derive(Clone, Debug, Default)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct LinearRegressionResult {
pub all_coefficients: Array1<f64>,
pub intercept: Option<f64>,
pub coefficients: Vec<LinearRegressionFeatureResult>,
pub ddof: usize,
pub sse: Option<f64>,
pub r_squared: Option<f64>,
pub adjusted_r_squared: Option<f64>,
pub max_likelihood: Option<f64>,
pub max_log_likelihood: Option<f64>,
}
impl Display for LinearRegressionResult {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let result: String = LARGE_TEXT_BREAK.to_owned()
+ "Linear Regression Analysis Result\n"
+ LARGE_TEXT_BREAK
+ &format!("\tIntercept: {}\n", self.intercept.unwrap_or(f64::NAN))
+ LARGE_TEXT_BREAK
+ &self.coefficients.iter().map(|f| f.to_string() ).collect::<Vec<String>>().join(LARGE_TEXT_BREAK)
+ LARGE_TEXT_BREAK
+ &format!("Delta Degrees of Freedom: {}\n", self.ddof)
+ &format!("Sum of Squared Estimate of Errors: {}\n", self.sse.unwrap_or(f64::NAN))
+ &format!("Coefficient of Determination (R-Squared): {}\n", self.r_squared.unwrap_or(f64::NAN))
+ &format!("Adjusted Coefficient of Determination (Adjusted R-Squared): {}\n", self.adjusted_r_squared.unwrap_or(f64::NAN))
+ &format!("Maximised Likelihood: {}\n", self.max_likelihood.unwrap_or(f64::NAN))
+ &format!("Maximised Log Likelihood: {}\n", self.max_log_likelihood.unwrap_or(f64::NAN))
+ LARGE_TEXT_BREAK;
write!(f, "{}", result)
}
}
#[derive(Clone, Debug, Default)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct LinearRegressionSettings {
pub enable_sse: bool,
pub enable_r_squared: bool,
pub enable_adjusted_r_squared: bool,
pub enable_max_likelihood: bool,
pub enable_se: bool,
pub enable_cov: bool,
pub enable_pearson_corr: bool,
pub enable_t_test: bool,
pub t_test_cl: Option<stat_tests::ConfidenceLevel>,
pub t_test_h0s: Option<Vec<f64>>,
pub enable_f_test: bool,
pub f_test_cl: Option<stat_tests::ConfidenceLevel>,
pub enable_cointegration: bool,
pub cointegration_cl: Option<stat_tests::ConfidenceLevel>,
pub enable_vif: bool,
}
impl LinearRegressionSettings {
pub fn enable_all() -> Self {
Self {
enable_sse: true, enable_r_squared: true, enable_adjusted_r_squared: true, enable_max_likelihood: true, enable_se: true,
enable_cov: true, enable_pearson_corr: true,
enable_t_test: true, t_test_cl: Some(stat_tests::ConfidenceLevel::default()), t_test_h0s: None,
enable_f_test: true, f_test_cl: Some(stat_tests::ConfidenceLevel::default()),
enable_cointegration: true, cointegration_cl: Some(stat_tests::ConfidenceLevel::default()),
enable_vif: true,
}
}
pub fn disable_all() -> Self {
Self {
enable_sse: false, enable_r_squared: false, enable_adjusted_r_squared: false, enable_max_likelihood: false, enable_se: false,
enable_cov: false, enable_pearson_corr: false,
enable_t_test: false, t_test_cl: None, t_test_h0s: None,
enable_f_test: false, f_test_cl: None,
enable_cointegration: false, cointegration_cl: None,
enable_vif: false,
}
}
}
impl Display for LinearRegressionSettings {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let result: String = LARGE_TEXT_BREAK.to_owned()
+ "Linear Regression Settings\n"
+ LARGE_TEXT_BREAK
+ &format!("Enable SSE: {}\n", self.enable_sse)
+ &format!("Enable Coefficient of Determination: {}\n", self.enable_r_squared)
+ &format!("Enable Adjusted Coefficient of Determination: {}\n", self.enable_adjusted_r_squared)
+ &format!("Enable Maximum Likelihood: {}\n", self.enable_max_likelihood)
+ &format!("Enable SE: {}\n", self.enable_se)
+ &format!("Enable Covariance: {}\n", self.enable_cov)
+ &format!("Enable Pearson Correlation: {}\n", self.enable_pearson_corr)
+ &format!("Enable t-Test: {}\n", self.enable_t_test)
+ &format!("t-Test Confidence Level: {}\n", self.t_test_cl.unwrap_or_default())
+ "t-Test Null Hypotheses: " + &(match &self.t_test_h0s {
Some(v) => {
v.iter().map(|v| v.to_string() ).collect::<Vec<String>>().join(", ")
},
None => "".to_owned(),
}) + "\n"
+ &format!("Enable F-Test: {}\n", self.enable_f_test)
+ &format!("F-Test Confidence Level: {}\n", self.f_test_cl.unwrap_or_default())
+ &format!("Enable Cointegration: {}\n", self.enable_cointegration)
+ &format!("Cointegration Confidence Level: {}\n", self.cointegration_cl.unwrap_or_default())
+ &format!("Enable Variance Inflation Factor: {}\n", self.enable_vif)
+ LARGE_TEXT_BREAK;
write!(f, "{}", result)
}
}
#[derive(Debug, Default)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct LinearRegressionAnalysis {
pub settings: LinearRegressionSettings,
}
impl LinearRegressionAnalysis {
pub fn new(settings: LinearRegressionSettings) -> Self {
Self { settings, }
}
fn validate_input(&self, fc: &FeatureCollection, y: &Array1<f64>) -> Result<(), DigiFiError> {
let y_len: usize = y.len();
let fc_len: usize = fc.len();
if y.is_empty() {
return Err(DigiFiError::ValidationError { title: Self::error_title(), details: "Array `y` must not be empty.".to_owned() ,});
}
match fc.feature_size() {
Some(feature_len) => {
if feature_len != y_len {
return Err(DigiFiError::UnmatchingLength { array_1: "feature".to_owned(), array_2: "y".to_owned(), });
}
},
None => {
return Err(DigiFiError::ValidationError { title: Self::error_title(), details: "The feature collection is empty.".to_owned(), });
},
}
if y_len < fc.ddof() {
return Err(DigiFiError::Other { title: Self::error_title(), details: "There are fewer data points in `y` array than `ddof`.".to_owned() });
}
if self.settings.enable_t_test {
match &self.settings.t_test_h0s {
Some(v) if v.len() != fc_len => return Err(DigiFiError::UnmatchingLength { array_1: "feature collection".to_owned(), array_2: "t_test_h0s".to_owned(), }),
_ => (),
}
}
Ok(())
}
pub fn run(&self, fc: &FeatureCollection, y: &Array1<f64>) -> Result<LinearRegressionResult, DigiFiError> {
self.validate_input(fc, y)?;
let y_len: usize = y.len();
let fc_len: usize = fc.len();
let x_matrix: Array2<f64> = fc.get_matrix()?;
let all_coefficients: Array1<f64> = statistics::linear_regression(&x_matrix, y)?;
let y_prediction: Array1<f64> = x_matrix.dot(&all_coefficients);
let ddof: usize = fc.ddof();
let mut coefficients: Vec<LinearRegressionFeatureResult> = vec![];
for i in 0..fc_len {
let x: &Array1<f64> = &fc.features[i];
let mut coefficient: LinearRegressionFeatureResult = LinearRegressionFeatureResult::default();
coefficient.coefficient_name = Some(fc.feature_names[i].clone());
coefficient.coefficient = all_coefficients[i];
if self.settings.enable_se { coefficient.standard_error = Some(statistics::se_lr_coefficient(y, &y_prediction, x, ddof)?); }
if self.settings.enable_cov { coefficient.covariance = Some(statistics::covariance(y, x, 0)?); }
if self.settings.enable_pearson_corr { coefficient.pearson_correlation = Some(statistics::pearson_correlation(y, x, 1)?); }
if self.settings.enable_t_test {
let beta_0: Option<f64> = match &self.settings.t_test_h0s { Some(v) => Some(v[i]), None => None, };
let t_test_result: stat_tests::TTestResult = stat_tests::t_test_lr(coefficient.coefficient, beta_0, y, &y_prediction, x, ddof, self.settings.t_test_cl)?;
coefficient.t_test_h0 = beta_0;
coefficient.t_test_reject_h0 = Some(t_test_result.reject_h0);
coefficient.t_test_t_score = Some(t_test_result.t_score);
coefficient.t_test_dof = Some(t_test_result.dof);
coefficient.t_test_p_value = Some(t_test_result.p_value);
coefficient.t_test_cl = Some(t_test_result.p_cl);
}
if self.settings.enable_f_test {
let f_test_result: stat_tests::FTestResult = stat_tests::f_test_anova(vec![y, x], self.settings.f_test_cl)?;
coefficient.f_test_reject_h0 = Some(f_test_result.reject_h0);
coefficient.f_test_f_score = Some(f_test_result.f_score);
coefficient.f_test_k = Some(f_test_result.k);
coefficient.f_test_n = Some(f_test_result.n);
coefficient.f_test_overall_mean = Some(f_test_result.overall_mean);
coefficient.f_test_explained_variance = Some(f_test_result.explained_variance);
coefficient.f_test_unexplained_variance = Some(f_test_result.unexplained_variance);
coefficient.f_test_dof_1 = Some(f_test_result.dof_1);
coefficient.f_test_dof_2 = Some(f_test_result.dof_2);
coefficient.f_test_p_value = Some(f_test_result.p_value);
coefficient.f_test_cl = Some(f_test_result.p_cl);
}
if self.settings.enable_cointegration {
let result: stat_tests::CointegrationResult = stat_tests::cointegration(x, y, self.settings.cointegration_cl)?;
coefficient.cointegrated = Some(result.cointegrated);
coefficient.cointegration_cl = self.settings.cointegration_cl.clone().map(|v| v.get_p() );
}
if self.settings.enable_vif {
let mut xis: FeatureCollection = fc.features.iter().zip(fc.feature_names.iter()).enumerate()
.filter(|(index, _)| index != &i )
.fold(FeatureCollection::new(), |mut fc_, (_, (array, label))| {
fc_.add_feature(array.iter(), label).unwrap();
fc_
} );
coefficient.vif = statistics::variance_inflation_factor(&mut xis, x)?;
}
coefficients.push(coefficient);
}
let intercept: Option<f64> = if fc.add_constant { all_coefficients.last().copied() } else { None };
let sse: Option<f64> = if self.settings.enable_sse { Some(SSE.loss_iter(y.iter(), y_prediction.iter())?) } else { None };
let r_squared: Option<f64> = if self.settings.enable_r_squared { Some(statistics::r_squared(y, &y_prediction)?) } else { None };
let adjusted_r_squared: Option<f64> = if self.settings.enable_adjusted_r_squared { Some(statistics::adjusted_r_squared(y, &y_prediction, y_len, fc_len)?) } else { None };
let max_log_likelihood: Option<f64> = if self.settings.enable_max_likelihood {
let sse: f64 = match &sse { Some(sse) => *sse, None => SSE.loss_iter(y.iter(), y_prediction.iter())?, };
let two_sigma_square: f64 = 2.0 * (y - y_prediction).std(0.0).powi(2);
Some((y_len as f64) * (1.0 / (two_sigma_square * std::f64::consts::PI).sqrt()).ln() - sse / (two_sigma_square))
} else {
None
};
let max_likelihood: Option<f64> = match max_log_likelihood { Some(l) => Some(l.exp()), None => None, };
Ok(LinearRegressionResult {
all_coefficients,
intercept, coefficients,
ddof, sse, r_squared, adjusted_r_squared, max_likelihood, max_log_likelihood,
})
}
}
impl ErrorTitle for LinearRegressionAnalysis {
fn error_title() -> String {
String::from("Linear Regression Analysis")
}
}
impl Display for LinearRegressionAnalysis {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.settings.to_string())
}
}
#[cfg(all(test, feature = "sample_data"))]
mod tests {
use ndarray::Array1;
use crate::utilities::{TEST_ACCURACY, FeatureCollection, sample_data::SampleData};
use crate::statistics::{LinearRegressionResult, LinearRegressionSettings, LinearRegressionAnalysis};
#[test]
fn unit_test_linear_regression_analysis() -> () {
let sample: SampleData = SampleData::CAPM;
let (_, mut sample_data) = sample.load_sample_data();
let market_premium: Array1<f64> = sample_data.remove("Market").unwrap() - sample_data.remove("RF").unwrap();
let mut fc: FeatureCollection = FeatureCollection::new();
fc.add_constant = true;
fc.add_feature(market_premium.into_iter(), "Market Premium").unwrap();
fc.add_feature(sample_data.remove("SMB").unwrap().into_iter(), "SMB").unwrap();
fc.add_feature(sample_data.remove("HML").unwrap().into_iter(), "HML").unwrap();
fc.add_feature(sample_data.remove("RMW").unwrap().into_iter(), "RMW").unwrap();
fc.add_feature(sample_data.remove("CMA").unwrap().into_iter(), "CMA").unwrap();
let risk_premium: Array1<f64> = sample_data.remove("Stock Returns").unwrap();
let lra: LinearRegressionAnalysis = LinearRegressionAnalysis::new(LinearRegressionSettings::enable_all());
let lr_result: LinearRegressionResult = lra.run(&mut fc, &risk_premium).unwrap();
assert!((lr_result.intercept.unwrap() - 0.01353015).abs() < TEST_ACCURACY);
assert!((lr_result.coefficients[0].coefficient - 1.37731033).abs() < TEST_ACCURACY);
assert!((lr_result.coefficients[1].coefficient - -0.38490771).abs() < TEST_ACCURACY);
assert!((lr_result.coefficients[2].coefficient - -0.58771487).abs() < TEST_ACCURACY);
assert!((lr_result.coefficients[3].coefficient - 0.11692186).abs() < TEST_ACCURACY);
assert!((lr_result.coefficients[4].coefficient - 0.4192746).abs() < TEST_ACCURACY);
}
}