use super::helpers::fit_ols;
use crate::distributions::{chi_squared_survival, fisher_snedecor_cdf};
use crate::error::{Error, Result};
use crate::linalg::Matrix;
use serde::Serialize;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BGTestType {
Chisq,
F,
}
#[derive(Debug, Clone, Serialize)]
pub struct BreuschGodfreyResult {
pub test_name: String,
pub order: usize,
pub test_type: String,
pub statistic: f64,
pub p_value: f64,
pub df: Vec<f64>,
#[serde(rename = "is_passed")]
pub passed: bool,
pub interpretation: String,
pub guidance: String,
}
pub fn breusch_godfrey_test(
y: &[f64],
x_vars: &[Vec<f64>],
order: usize,
test_type: BGTestType,
) -> Result<BreuschGodfreyResult> {
let n = y.len();
let k = x_vars.len();
let p = k + 1;
if n <= p + order {
return Err(Error::InsufficientData {
required: p + order + 1,
available: n,
});
}
super::helpers::validate_regression_data(y, x_vars)?;
if order == 0 {
let test_type_str = match test_type {
BGTestType::Chisq => "Chi-squared",
BGTestType::F => "F",
};
return Ok(BreuschGodfreyResult {
test_name: "Breusch-Godfrey LM test for serial correlation of order up to 0".to_string(),
order: 0,
test_type: test_type_str.to_string(),
statistic: 0.0,
p_value: 1.0,
df: vec![0.0],
passed: true,
interpretation: "Breusch-Godfrey test (order = 0): No lags tested, null hypothesis trivially holds.".to_string(),
guidance: "No action needed. Order 0 means no serial correlation was tested.".to_string(),
});
}
let mut x_data = vec![0.0; n * p];
for row in 0..n {
x_data[row * p] = 1.0; for (col, x_var) in x_vars.iter().enumerate() {
x_data[row * p + col + 1] = x_var[row];
}
}
let x = Matrix::new(n, p, x_data);
let beta = fit_ols(y, &x)?;
let predictions = x.mul_vec(&beta);
let residuals: Vec<f64> = y
.iter()
.zip(predictions.iter())
.map(|(&yi, &yi_hat)| yi - yi_hat)
.collect();
let mut lag_data = vec![0.0; n * order];
for row in 0..n {
for lag in 0..order {
if row > lag {
lag_data[row * order + lag] = residuals[row - lag - 1];
}
}
}
let p_aug = p + order;
let mut x_aug_data = vec![0.0; n * p_aug];
for row in 0..n {
for col in 0..p {
x_aug_data[row * p_aug + col] = x.data[row * p + col];
}
for lag in 0..order {
x_aug_data[row * p_aug + p + lag] = lag_data[row * order + lag];
}
}
let x_aug = Matrix::new(n, p_aug, x_aug_data);
let beta_aug = fit_ols(&residuals, &x_aug)?;
let fitted_aug = x_aug.mul_vec(&beta_aug);
let ss_res: f64 = residuals
.iter()
.zip(fitted_aug.iter())
.map(|(&r, &f)| (r - f) * (r - f))
.sum();
let mean_res = residuals.iter().sum::<f64>() / n as f64;
let ss_tot: f64 = residuals
.iter()
.map(|&r| (r - mean_res) * (r - mean_res))
.sum();
let r_squared = 1.0 - ss_res / ss_tot;
let (statistic, df, p_value) = match test_type {
BGTestType::Chisq => {
let lm = n as f64 * r_squared;
let df_val = order as f64;
let p_val = chi_squared_survival(lm, df_val);
(lm, vec![df_val], p_val)
}
BGTestType::F => {
let beta_restricted = fit_ols(&residuals, &x)?;
let fitted_restricted = x.mul_vec(&beta_restricted);
let rss0: f64 = residuals
.iter()
.zip(fitted_restricted.iter())
.map(|(&r, &f)| (r - f) * (r - f))
.sum();
let rss1 = ss_res;
let df1 = order as f64;
let df2 = (n - p - order) as f64;
if df2 < 1.0 {
return Err(Error::InsufficientData {
required: p + order + 2,
available: n,
});
}
let f_stat = ((rss0 - rss1) / df1) / (rss1 / df2);
let p_val = 1.0 - fisher_snedecor_cdf(f_stat, df1, df2);
(f_stat, vec![df1, df2], p_val)
}
};
let alpha = 0.05;
let passed = p_value > alpha;
let test_type_str = match test_type {
BGTestType::Chisq => "Chi-squared",
BGTestType::F => "F",
};
let interpretation = if passed {
format!(
"Breusch-Godfrey {} test (order = {}): statistic = {:.4}, p-value = {:.4}. \
No significant serial correlation detected up to order {}.",
test_type_str, order, statistic, p_value, order
)
} else {
format!(
"Breusch-Godfrey {} test (order = {}): statistic = {:.4}, p-value = {:.4}. \
Significant serial correlation detected at order <= {}.",
test_type_str, order, statistic, p_value, order
)
};
let guidance = if passed {
"No action needed. The residuals show no significant serial correlation up to the specified order.".to_string()
} else {
"Consider: (1) Adding lagged dependent variables, (2) Using autoregressive error models (e.g., Cochrane-Orcutt), (3) Checking for omitted variables, (4) Using HAC standard errors.".to_string()
};
Ok(BreuschGodfreyResult {
test_name: format!("Breusch-Godfrey LM test for serial correlation of order up to {}", order),
order,
test_type: test_type_str.to_string(),
statistic,
p_value,
df,
passed,
interpretation,
guidance,
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_breusch_godfrey_simple() {
let y = vec![2.1, 4.2, 5.8, 8.1, 10.1, 12.2, 13.8, 16.1];
let x1 = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
let result = breusch_godfrey_test(&y, &[x1], 1, BGTestType::Chisq).unwrap();
assert!(result.statistic >= 0.0);
assert!(result.p_value >= 0.0 && result.p_value <= 1.0);
assert_eq!(result.order, 1);
assert_eq!(result.test_type, "Chi-squared");
assert_eq!(result.df.len(), 1);
assert_eq!(result.df[0], 1.0);
}
#[test]
fn test_breusch_godfrey_order_4() {
let y = vec![
2.1, 4.2, 5.8, 8.1, 10.1, 12.2, 13.8, 16.1, 17.9, 20.2, 21.8, 24.1,
];
let x1 = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0];
let result = breusch_godfrey_test(&y, &[x1], 4, BGTestType::Chisq).unwrap();
assert_eq!(result.order, 4);
assert_eq!(result.df[0], 4.0);
assert!(result.p_value >= 0.0 && result.p_value <= 1.0);
}
#[test]
fn test_breusch_godfrey_f_statistic() {
let y = vec![
2.1, 4.2, 5.8, 8.1, 10.1, 12.2, 13.8, 16.1, 17.9, 20.2, 21.8, 24.1,
];
let x1 = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0];
let result = breusch_godfrey_test(&y, &[x1], 2, BGTestType::F).unwrap();
assert_eq!(result.test_type, "F");
assert_eq!(result.df.len(), 2);
assert_eq!(result.df[0], 2.0); assert!(result.df[1] > 0.0); assert!(result.statistic >= 0.0);
}
#[test]
fn test_insufficient_data() {
let y = vec![1.0, 2.0];
let x1 = vec![1.0, 2.0];
let result = breusch_godfrey_test(&y, &[x1], 1, BGTestType::Chisq);
assert!(matches!(result, Err(Error::InsufficientData { .. })));
}
#[test]
fn test_breusch_godfrey_multiple_predictors() {
let y = vec![
2.1, 4.2, 5.8, 8.1, 10.1, 12.2, 13.8, 16.1, 17.9, 20.2, 21.8, 24.1,
];
let x1 = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0];
let x2 = vec![2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0];
let result = breusch_godfrey_test(&y, &[x1, x2], 1, BGTestType::Chisq).unwrap();
assert!(result.statistic >= 0.0);
assert!(result.p_value >= 0.0 && result.p_value <= 1.0);
assert!(!result.interpretation.is_empty());
assert!(!result.guidance.is_empty());
}
#[test]
fn test_interpretation_content() {
let y = vec![
2.1, 4.2, 5.8, 8.1, 10.1, 12.2, 13.8, 16.1, 17.9, 20.2, 21.8, 24.1,
];
let x1 = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0];
let result = breusch_godfrey_test(&y, &[x1], 1, BGTestType::Chisq).unwrap();
assert!(result.interpretation.contains("Breusch-Godfrey"));
assert!(result.interpretation.contains("order"));
assert!(result.interpretation.contains("statistic"));
assert!(result.interpretation.contains("p-value"));
assert!(!result.guidance.is_empty());
}
#[test]
fn test_chisq_vs_f_similarity() {
let mut y = Vec::new();
let mut x1 = Vec::new();
for i in 1..=50 {
x1.push(i as f64);
y.push(2.0 * i as f64 + 5.0 + (i as f64 * 0.1)); }
let result_chisq =
breusch_godfrey_test(&y, &[x1.clone()], 1, BGTestType::Chisq).unwrap();
let result_f = breusch_godfrey_test(&y, &[x1], 1, BGTestType::F).unwrap();
assert!(result_chisq.p_value >= 0.0 && result_chisq.p_value <= 1.0);
assert!(result_f.p_value >= 0.0 && result_f.p_value <= 1.0);
}
#[test]
fn test_detect_autocorrelation() {
let mut y = vec![10.0];
let mut x1 = vec![1.0];
for i in 1..=30 {
x1.push(i as f64);
let new_val = y[i - 1] + 2.0 + (i as f64) * 0.5;
y.push(new_val);
}
let result = breusch_godfrey_test(&y, &[x1], 1, BGTestType::Chisq).unwrap();
assert!(result.statistic >= 0.0);
assert!(result.p_value >= 0.0 && result.p_value <= 1.0);
}
#[test]
fn test_order_zero() {
let y = vec![2.1, 4.2, 5.8, 8.1, 10.1, 12.2, 13.8, 16.1];
let x1 = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
let result = breusch_godfrey_test(&y, &[x1], 0, BGTestType::Chisq).unwrap();
assert_eq!(result.order, 0);
assert_eq!(result.df[0], 0.0);
assert_eq!(result.p_value, 1.0);
}
#[test]
fn test_synthetic_autocorrelated() {
let y = vec![
3.15, 5.49, 7.48, 9.92, 12.09, 14.62, 16.88, 19.40, 21.66, 24.38, 26.50, 29.29,
31.40, 34.07, 36.54, 39.21, 41.51, 44.18, 46.55, 49.02,
];
let x1 = vec![
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
15.0, 16.0, 17.0, 18.0, 19.0, 20.0,
];
let result = breusch_godfrey_test(&y, &[x1], 1, BGTestType::Chisq).unwrap();
assert!(result.p_value < 0.2); assert!(result.statistic > 0.0);
}
}