use scirs2_core::ndarray::{Array1, Array2, ArrayBase, Data, Dimension};
use scirs2_core::numeric::{Float, FromPrimitive, NumCast};
use std::cmp::Ordering;
use super::check_sameshape;
use crate::error::{MetricsError, Result};
#[derive(Debug, Clone)]
pub struct ErrorHistogram<F: Float> {
pub bin_edges: Vec<F>,
pub bin_counts: Vec<usize>,
pub n_observations: usize,
pub min_error: F,
pub max_error: F,
}
#[allow(dead_code)]
pub fn error_histogram<F, S1, S2, D1, D2>(
y_true: &ArrayBase<S1, D1>,
y_pred: &ArrayBase<S2, D2>,
n_bins: usize,
) -> Result<ErrorHistogram<F>>
where
F: Float + NumCast + std::fmt::Debug + FromPrimitive,
S1: Data<Elem = F>,
S2: Data<Elem = F>,
D1: Dimension,
D2: Dimension,
{
check_sameshape::<F, S1, S2, D1, D2>(y_true, y_pred)?;
if n_bins == 0 {
return Err(MetricsError::InvalidInput(
"Number of _bins must be positive".to_string(),
));
}
let n_samples = y_true.len();
let mut residuals = Vec::with_capacity(n_samples);
for (yt, yp) in y_true.iter().zip(y_pred.iter()) {
residuals.push(*yt - *yp);
}
let mut min_error = residuals[0];
let mut max_error = residuals[0];
for &residual in &residuals[1..] {
if residual < min_error {
min_error = residual;
}
if residual > max_error {
max_error = residual;
}
}
let range = if max_error > min_error {
max_error - min_error
} else {
F::one()
};
let bin_width = range / NumCast::from(n_bins).expect("Operation failed");
let mut bin_edges = Vec::with_capacity(n_bins + 1);
for i in 0..=n_bins {
bin_edges.push(min_error + F::from(i).expect("Failed to convert to float") * bin_width);
}
let mut bin_counts = vec![0; n_bins];
for &residual in &residuals {
if residual == max_error {
bin_counts[n_bins - 1] += 1;
} else {
let bin_idx = ((residual - min_error) / bin_width)
.to_usize()
.expect("Operation failed");
bin_counts[bin_idx] += 1;
}
}
Ok(ErrorHistogram {
bin_edges,
bin_counts,
n_observations: n_samples,
min_error,
max_error,
})
}
#[derive(Debug, Clone)]
pub struct QQPlotData<F: Float> {
pub theoretical_quantiles: Vec<F>,
pub sample_quantiles: Vec<F>,
pub reference_line: Vec<(F, F)>,
}
#[allow(dead_code)]
pub fn qq_plot_data<F, S1, S2, D1, D2>(
y_true: &ArrayBase<S1, D1>,
y_pred: &ArrayBase<S2, D2>,
n_quantiles: usize,
) -> Result<QQPlotData<F>>
where
F: Float + NumCast + std::fmt::Debug + FromPrimitive,
S1: Data<Elem = F>,
S2: Data<Elem = F>,
D1: Dimension,
D2: Dimension,
{
check_sameshape::<F, S1, S2, D1, D2>(y_true, y_pred)?;
if n_quantiles < 2 {
return Err(MetricsError::InvalidInput(
"Number of _quantiles must be at least 2".to_string(),
));
}
let n_samples = y_true.len();
let mut residuals = Vec::with_capacity(n_samples);
for (yt, yp) in y_true.iter().zip(y_pred.iter()) {
residuals.push(*yt - *yp);
}
residuals.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
let mean = residuals.iter().fold(F::zero(), |acc, &x| acc + x)
/ NumCast::from(n_samples).expect("Operation failed");
let variance = residuals.iter().fold(F::zero(), |acc, &x| {
let diff = x - mean;
acc + diff * diff
}) / NumCast::from(n_samples).expect("Operation failed");
let std_dev = variance.sqrt();
let mut std_residuals = Vec::with_capacity(n_samples);
for &r in &residuals {
std_residuals.push((r - mean) / std_dev);
}
let mut theoretical_quantiles = Vec::with_capacity(n_quantiles);
let mut sample_quantiles = Vec::with_capacity(n_quantiles);
let step = F::one() / NumCast::from(n_quantiles + 1).expect("Operation failed");
for i in 1..=n_quantiles {
let p: F = F::from(i).expect("Failed to convert to float") * step;
let theoretical_q = normal_quantile(p.to_f64().expect("Operation failed"));
theoretical_quantiles.push(F::from(theoretical_q).expect("Failed to convert to float"));
let idx = (p * NumCast::from(n_samples).expect("Operation failed"))
.to_usize()
.expect("Operation failed")
.min(n_samples - 1);
sample_quantiles.push(std_residuals[idx]);
}
let mut min_val = theoretical_quantiles[0].min(sample_quantiles[0]);
let mut max_val = theoretical_quantiles[n_quantiles - 1].max(sample_quantiles[n_quantiles - 1]);
let range = max_val - min_val;
min_val = min_val - range * F::from_f64(0.05).expect("Operation failed");
max_val = max_val + range * F::from_f64(0.05).expect("Operation failed");
let reference_line = vec![(min_val, min_val), (max_val, max_val)];
Ok(QQPlotData {
theoretical_quantiles,
sample_quantiles,
reference_line,
})
}
#[allow(dead_code)]
fn normal_quantile(p: f64) -> f64 {
if p <= 0.0 || p >= 1.0 {
if p <= 0.0 {
return -5.0; } else {
return 5.0; }
}
let a = [
2.50662823884,
-18.61500062529,
41.39119773534,
-25.44106049637,
];
let b = [
-8.47351093090,
23.08336743743,
-21.06224101826,
3.13082909833,
];
let c = [
0.3374754822726147,
0.9761690190917186,
0.1607979714918209,
0.0276438810333863,
0.0038405729373609,
0.0003951896511919,
0.0000321767881768,
0.0000002888167364,
0.0000003960315187,
];
if (0.08..=0.92).contains(&p) {
let q = p - 0.5;
let r = q * q;
let mut result = q * (a[0] + r * (a[1] + r * (a[2] + r * a[3])));
result /= 1.0 + r * (b[0] + r * (b[1] + r * (b[2] + r * b[3])));
return result;
}
let q = if p < 0.08 {
(-2.0 * (p).ln()).sqrt()
} else {
(-2.0 * (1.0 - p).ln()).sqrt()
};
let result = c[0]
+ q * (c[1]
+ q * (c[2]
+ q * (c[3] + q * (c[4] + q * (c[5] + q * (c[6] + q * (c[7] + q * c[8])))))));
if p < 0.08 {
-result
} else {
result
}
}
#[derive(Debug, Clone)]
pub struct ResidualAnalysis<F: Float> {
pub residuals: Vec<F>,
pub standardized_residuals: Vec<F>,
pub studentized_residuals: Vec<F>,
pub cooks_distances: Vec<F>,
pub dffits: Vec<F>,
pub leverage: Vec<F>,
pub histogram: ErrorHistogram<F>,
pub qq_plot: QQPlotData<F>,
pub durbin_watson: F,
pub breusch_pagan: F,
pub shapiro_wilk: F,
}
#[allow(dead_code)]
pub fn residual_analysis<F, S1, S2, D1, D2>(
y_true: &ArrayBase<S1, D1>,
y_pred: &ArrayBase<S2, D2>,
x: Option<&Array2<F>>,
hat_matrix: Option<&Array2<F>>,
) -> Result<ResidualAnalysis<F>>
where
F: Float + NumCast + std::fmt::Debug + FromPrimitive + 'static,
S1: Data<Elem = F>,
S2: Data<Elem = F>,
D1: Dimension,
D2: Dimension,
{
check_sameshape::<F, S1, S2, D1, D2>(y_true, y_pred)?;
let n_samples = y_true.len();
if let Some(x_mat) = x {
if x_mat.shape()[0] != n_samples {
return Err(MetricsError::InvalidInput(format!(
"X _matrix has {} rows, but y_true has {} elements",
x_mat.shape()[0],
n_samples
)));
}
}
if let Some(h_mat) = hat_matrix {
if h_mat.shape() != [n_samples, n_samples] {
return Err(MetricsError::InvalidInput(format!(
"Hat _matrix has shape {:?}, but should be [{}, {}]",
h_mat.shape(),
n_samples,
n_samples
)));
}
}
let mut residuals = Vec::with_capacity(n_samples);
for (yt, yp) in y_true.iter().zip(y_pred.iter()) {
residuals.push(*yt - *yp);
}
let residual_mean = residuals.iter().fold(F::zero(), |acc, &r| acc + r)
/ NumCast::from(n_samples).expect("Operation failed");
let residual_var = residuals.iter().fold(F::zero(), |acc, &r| {
let diff = r - residual_mean;
acc + diff * diff
}) / NumCast::from(n_samples).expect("Operation failed");
let residual_std = residual_var.sqrt();
let mut standardized_residuals = Vec::with_capacity(n_samples);
for &r in &residuals {
standardized_residuals.push((r - residual_mean) / residual_std);
}
let leverage = if let Some(h_mat) = hat_matrix {
let mut h_diag = Vec::with_capacity(n_samples);
for i in 0..n_samples {
h_diag.push(h_mat[[i, i]]);
}
h_diag
} else if let Some(x_mat) = x {
let p = x_mat.shape()[1]; let xt = x_mat.t();
let xtx = xt.dot(x_mat);
let mut xtx_inv = Array2::<F>::zeros((p, p));
for i in 0..p {
if xtx[[i, i]] > F::epsilon() {
xtx_inv[[i, i]] = F::one() / xtx[[i, i]];
}
}
let mut h_diag = Vec::with_capacity(n_samples);
for i in 0..n_samples {
let mut h_ii = F::zero();
for j in 0..p {
for k in 0..p {
h_ii = h_ii + x_mat[[i, j]] * xtx_inv[[j, k]] * x_mat[[i, k]];
}
}
h_diag.push(h_ii);
}
h_diag
} else {
vec![F::one() / NumCast::from(n_samples).expect("Operation failed"); n_samples]
};
let mut studentized_residuals = Vec::with_capacity(n_samples);
for (i, &r) in residuals.iter().enumerate() {
let h_ii = leverage[i];
if h_ii < F::one() {
let student_r = r / (residual_std * (F::one() - h_ii).sqrt());
studentized_residuals.push(student_r);
} else {
studentized_residuals.push(F::zero());
}
}
let mut cooks_distances = Vec::with_capacity(n_samples);
for (i, &r) in standardized_residuals.iter().enumerate() {
let h_ii = leverage[i];
if h_ii < F::one() {
let p_value = if let Some(x_mat) = x {
x_mat.shape()[1]
} else {
1 };
let cook_d = (r * r) * (h_ii / (F::one() - h_ii))
/ NumCast::from(p_value).expect("Operation failed");
cooks_distances.push(cook_d);
} else {
cooks_distances.push(F::zero());
}
}
let mut dffits = Vec::with_capacity(n_samples);
for (i, &r) in studentized_residuals.iter().enumerate() {
let h_ii = leverage[i];
if h_ii < F::one() {
let dffit = r * (h_ii / (F::one() - h_ii)).sqrt();
dffits.push(dffit);
} else {
dffits.push(F::zero());
}
}
let mut numerator = F::zero();
for i in 1..n_samples {
let diff = residuals[i] - residuals[i - 1];
numerator = numerator + diff * diff;
}
let denominator = residuals.iter().fold(F::zero(), |acc, &r| acc + r * r);
let durbin_watson = if denominator > F::epsilon() {
numerator / denominator
} else {
F::from(2.0).expect("Failed to convert constant to float") };
let squared_residuals: Vec<F> = residuals.iter().map(|&r| r * r).collect();
let mean_sq_residual = squared_residuals.iter().fold(F::zero(), |acc, &r| acc + r)
/ NumCast::from(n_samples).expect("Operation failed");
let mut numerator = F::zero();
let mut denominator = F::zero();
for (i, &sq_r) in squared_residuals.iter().enumerate() {
let _pred = y_pred.iter().nth(i).expect("Operation failed");
let diff = sq_r - mean_sq_residual;
numerator = numerator + diff * diff;
denominator = denominator + (*_pred) * (*_pred);
}
let breusch_pagan = if denominator > F::epsilon() {
numerator / denominator
} else {
F::zero()
};
let mut ordered_residuals = standardized_residuals.clone();
ordered_residuals.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
let mut expected_quantiles = Vec::with_capacity(n_samples);
for i in 0..n_samples {
let p = (F::from(i + 1).expect("Failed to convert to float")
- F::from(0.375).expect("Failed to convert constant to float"))
/ (F::from(n_samples).expect("Failed to convert to float")
+ F::from(0.25).expect("Failed to convert constant to float"));
let q = normal_quantile(p.to_f64().expect("Operation failed"));
expected_quantiles.push(F::from(q).expect("Failed to convert to float"));
}
let mut numerator = F::zero();
let mut denominator = F::zero();
for (i, &r) in ordered_residuals.iter().enumerate() {
let q = expected_quantiles[i];
numerator = numerator + r * q;
denominator = denominator + r * r;
}
let shapiro_wilk = if denominator > F::epsilon() {
(numerator / denominator).powi(2)
} else {
F::zero()
};
let histogram = error_histogram(y_true, y_pred, 10)?;
let qq_plot = qq_plot_data(y_true, y_pred, 20)?;
Ok(ResidualAnalysis {
residuals,
standardized_residuals,
studentized_residuals,
cooks_distances,
dffits,
leverage,
histogram,
qq_plot,
durbin_watson,
breusch_pagan,
shapiro_wilk,
})
}
#[allow(dead_code)]
pub fn test_heteroscedasticity<F, S1, S2, D1, D2>(
y_true: &ArrayBase<S1, D1>,
y_pred: &ArrayBase<S2, D2>,
) -> Result<F>
where
F: Float + NumCast + std::fmt::Debug + FromPrimitive,
S1: Data<Elem = F>,
S2: Data<Elem = F>,
D1: Dimension,
D2: Dimension,
{
check_sameshape::<F, S1, S2, D1, D2>(y_true, y_pred)?;
let n_samples = y_true.len();
let mut residuals = Vec::with_capacity(n_samples);
for (yt, yp) in y_true.iter().zip(y_pred.iter()) {
residuals.push(*yt - *yp);
}
let squared_residuals: Vec<F> = residuals.iter().map(|&r| r * r).collect();
let mean_sq_residual = squared_residuals.iter().fold(F::zero(), |acc, &r| acc + r)
/ NumCast::from(n_samples).expect("Operation failed");
let mut numerator = F::zero();
let mut denominator = F::zero();
for (i, &sq_r) in squared_residuals.iter().enumerate() {
let _pred = y_pred.iter().nth(i).expect("Operation failed");
let diff = sq_r - mean_sq_residual;
numerator = numerator + diff * diff;
denominator = denominator + (*_pred) * (*_pred);
}
if denominator < F::epsilon() {
return Err(MetricsError::InvalidInput(
"Denominator in heteroscedasticity test is zero".to_string(),
));
}
let bp_stat = numerator / denominator;
Ok(bp_stat)
}
#[allow(dead_code)]
pub fn test_autocorrelation<F, S1, S2, D1, D2>(
y_true: &ArrayBase<S1, D1>,
y_pred: &ArrayBase<S2, D2>,
) -> Result<F>
where
F: Float + NumCast + std::fmt::Debug + FromPrimitive,
S1: Data<Elem = F>,
S2: Data<Elem = F>,
D1: Dimension,
D2: Dimension,
{
check_sameshape::<F, S1, S2, D1, D2>(y_true, y_pred)?;
let n_samples = y_true.len();
if n_samples < 2 {
return Err(MetricsError::InvalidInput(
"At least 2 samples required for autocorrelation test".to_string(),
));
}
let mut residuals = Vec::with_capacity(n_samples);
for (yt, yp) in y_true.iter().zip(y_pred.iter()) {
residuals.push(*yt - *yp);
}
let mut numerator = F::zero();
for i in 1..n_samples {
let diff = residuals[i] - residuals[i - 1];
numerator = numerator + diff * diff;
}
let denominator = residuals.iter().fold(F::zero(), |acc, &r| acc + r * r);
if denominator < F::epsilon() {
return Err(MetricsError::InvalidInput(
"Sum of squared residuals is zero in autocorrelation test".to_string(),
));
}
let dw_stat = numerator / denominator;
Ok(dw_stat)
}
#[allow(dead_code)]
pub fn test_normality<F, S1, S2, D1, D2>(
y_true: &ArrayBase<S1, D1>,
y_pred: &ArrayBase<S2, D2>,
) -> Result<F>
where
F: Float + NumCast + std::fmt::Debug + FromPrimitive,
S1: Data<Elem = F>,
S2: Data<Elem = F>,
D1: Dimension,
D2: Dimension,
{
check_sameshape::<F, S1, S2, D1, D2>(y_true, y_pred)?;
let n_samples = y_true.len();
let mut residuals = Vec::with_capacity(n_samples);
for (yt, yp) in y_true.iter().zip(y_pred.iter()) {
residuals.push(*yt - *yp);
}
let mean = residuals.iter().fold(F::zero(), |acc, &r| acc + r)
/ NumCast::from(n_samples).expect("Operation failed");
let variance = residuals.iter().fold(F::zero(), |acc, &r| {
let diff = r - mean;
acc + diff * diff
}) / NumCast::from(n_samples).expect("Operation failed");
let std_dev = variance.sqrt();
let mut std_residuals = Vec::with_capacity(n_samples);
for &r in &residuals {
std_residuals.push((r - mean) / std_dev);
}
let mut ordered_residuals = std_residuals.clone();
ordered_residuals.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
let mut expected_quantiles = Vec::with_capacity(n_samples);
for i in 0..n_samples {
let p = (F::from(i + 1).expect("Failed to convert to float")
- F::from(0.375).expect("Failed to convert constant to float"))
/ (F::from(n_samples).expect("Failed to convert to float")
+ F::from(0.25).expect("Failed to convert constant to float"));
let q = normal_quantile(p.to_f64().expect("Operation failed"));
expected_quantiles.push(F::from(q).expect("Failed to convert to float"));
}
let mean_residual = F::zero(); let mean_quantile = expected_quantiles.iter().fold(F::zero(), |acc, &q| acc + q)
/ NumCast::from(n_samples).expect("Operation failed");
let mut numerator = F::zero();
let mut denom_residual = F::zero();
let mut denom_quantile = F::zero();
for i in 0..n_samples {
let res_dev = ordered_residuals[i] - mean_residual;
let quant_dev = expected_quantiles[i] - mean_quantile;
numerator = numerator + res_dev * quant_dev;
denom_residual = denom_residual + res_dev * res_dev;
denom_quantile = denom_quantile + quant_dev * quant_dev;
}
let denominator = (denom_residual * denom_quantile).sqrt();
if denominator < F::epsilon() {
return Err(MetricsError::InvalidInput(
"Denominator in normality test is zero".to_string(),
));
}
let correlation = numerator / denominator;
let sw_stat = correlation * correlation;
Ok(sw_stat.min(F::one()))
}