1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
use ndarray::{Array1, Array2};
use ndarray_linalg::Inverse;
use statrs::distribution::{ChiSquared, ContinuousCDF, FisherSnedecor};
/// Specification tests for regression models
pub struct SpecificationTests;
impl SpecificationTests {
/// White's Test for Heteroskedasticity
///
/// Tests H₀: Homoskedasticity (constant variance) vs H₁: Heteroskedasticity
///
/// # Arguments
/// * `residuals` - Residuals from OLS regression
/// * `x` - Design matrix (n × k)
///
/// # Returns
/// Tuple of (LM_statistic, p_value, degrees_of_freedom)
///
/// # Interpretation
/// - If p < 0.05: Reject H₀, heteroskedasticity is present (use robust SE)
/// - If p > 0.05: Fail to reject H₀, homoskedasticity is plausible
///
/// # Example
/// ```no_run
/// use greeners::SpecificationTests;
///
/// let (lm_stat, p_value, df) = SpecificationTests::white_test(&residuals, &x)?;
/// if p_value < 0.05 {
/// println!("Heteroskedasticity detected! Use robust standard errors.");
/// }
/// ```
pub fn white_test(
residuals: &Array1<f64>,
x: &Array2<f64>,
) -> Result<(f64, f64, usize), String> {
let n = residuals.len();
let k = x.ncols();
// Square residuals (dependent variable for auxiliary regression)
let u_squared = residuals.mapv(|r| r.powi(2));
// Create auxiliary regressors: x and x² (simplified to avoid singularity)
// Exclude constant term (first column) to avoid perfect multicollinearity
let mut aux_regressors = Vec::new();
// Add constant term
aux_regressors.push(x.column(0).to_owned());
// Add non-constant regressors and their squares (skip first column = constant)
for j in 1..k {
aux_regressors.push(x.column(j).to_owned());
}
// Add squared terms for non-constant regressors
for j in 1..k {
let x_j = x.column(j);
aux_regressors.push(x_j.mapv(|v| v.powi(2)));
}
let p = aux_regressors.len(); // Total number of auxiliary regressors
// Build auxiliary design matrix
let mut x_aux = Array2::<f64>::zeros((n, p));
for (j, regressor) in aux_regressors.iter().enumerate() {
x_aux.column_mut(j).assign(regressor);
}
// Auxiliary regression: u² = X_aux * β + error
// Calculate R² from this auxiliary regression
let x_t = x_aux.t();
let xtx = x_t.dot(&x_aux);
let xtx_inv: Array2<f64> = match xtx.inv() {
Ok(inv) => inv,
Err(_) => return Err("Singular matrix in White test auxiliary regression".to_string()),
};
let xty = x_t.dot(&u_squared);
let beta_aux: Array1<f64> = xtx_inv.dot(&xty);
let fitted: Array1<f64> = x_aux.dot(&beta_aux);
// Calculate R² for auxiliary regression
let mean_u_sq = u_squared.mean().unwrap_or(0.0);
let tss = u_squared
.iter()
.map(|&y| (y - mean_u_sq).powi(2))
.sum::<f64>();
let rss = fitted
.iter()
.zip(u_squared.iter())
.map(|(&f, &y)| (y - f).powi(2))
.sum::<f64>();
let r_squared = 1.0 - rss / tss;
// White's LM statistic: n * R²
let lm_stat = (n as f64) * r_squared;
// Degrees of freedom = number of auxiliary regressors (excluding constant if present)
let df = p;
// Under H₀, LM ~ χ²(df)
let chi2_dist = ChiSquared::new(df as f64).map_err(|e| e.to_string())?;
let p_value = 1.0 - chi2_dist.cdf(lm_stat);
Ok((lm_stat, p_value, df))
}
/// Ramsey RESET Test for Functional Form Misspecification
///
/// Tests H₀: Model is correctly specified vs H₁: Functional form misspecification
///
/// # Arguments
/// * `y` - Dependent variable
/// * `x` - Design matrix (n × k)
/// * `fitted_values` - Fitted values from original regression
/// * `power` - Maximum power of fitted values to include (typically 2, 3, or 4)
///
/// # Returns
/// Tuple of (F_statistic, p_value, df_num, df_denom)
///
/// # Interpretation
/// - If p < 0.05: Reject H₀, functional form misspecification detected
/// - If p > 0.05: Fail to reject H₀, functional form appears adequate
///
/// # Example
/// ```no_run
/// use greeners::SpecificationTests;
///
/// let fitted = model.fitted_values(&x);
/// let (f_stat, p_value, _, _) = SpecificationTests::reset_test(&y, &x, &fitted, 3)?;
/// if p_value < 0.05 {
/// println!("Functional form misspecification detected!");
/// }
/// ```
pub fn reset_test(
y: &Array1<f64>,
x: &Array2<f64>,
fitted_values: &Array1<f64>,
power: usize,
) -> Result<(f64, f64, usize, usize), String> {
if power < 2 {
return Err("Power must be at least 2 for RESET test".to_string());
}
let n = y.len();
let _k = x.ncols();
// Original model SSR
let residuals_orig: Array1<f64> = y - fitted_values;
let ssr_orig = residuals_orig.dot(&residuals_orig);
// Augmented model: add ŷ², ŷ³, ..., ŷ^power
let mut x_augmented = x.to_owned();
for p in 2..=power {
let y_hat_p = fitted_values.mapv(|v| v.powi(p as i32));
let n_rows = x_augmented.nrows();
let n_cols = x_augmented.ncols();
let mut new_x = Array2::<f64>::zeros((n_rows, n_cols + 1));
new_x.slice_mut(ndarray::s![.., 0..n_cols]).assign(&x_augmented);
new_x.column_mut(n_cols).assign(&y_hat_p);
x_augmented = new_x;
}
// Estimate augmented model
let x_aug_t = x_augmented.t();
let xtx_aug = x_aug_t.dot(&x_augmented);
let xtx_aug_inv: Array2<f64> = match xtx_aug.inv() {
Ok(inv) => inv,
Err(_) => return Err("Singular matrix in RESET test".to_string()),
};
let xty_aug = x_aug_t.dot(y);
let beta_aug: Array1<f64> = xtx_aug_inv.dot(&xty_aug);
let fitted_aug: Array1<f64> = x_augmented.dot(&beta_aug);
let residuals_aug: Array1<f64> = y - &fitted_aug;
let ssr_aug = residuals_aug.dot(&residuals_aug);
// F-statistic
let q = power - 1; // Number of restrictions (added powers)
let df_num = q;
let df_denom = n - x_augmented.ncols();
if df_denom <= 0 {
return Err("Insufficient degrees of freedom for RESET test".to_string());
}
let f_stat = ((ssr_orig - ssr_aug) / df_num as f64) / (ssr_aug / df_denom as f64);
// P-value
let f_dist =
FisherSnedecor::new(df_num as f64, df_denom as f64).map_err(|e| e.to_string())?;
let p_value = 1.0 - f_dist.cdf(f_stat);
Ok((f_stat, p_value, df_num, df_denom))
}
/// Breusch-Godfrey Test for Autocorrelation
///
/// Tests H₀: No autocorrelation up to lag p vs H₁: Autocorrelation present
///
/// # Arguments
/// * `residuals` - Residuals from OLS regression
/// * `x` - Design matrix (n × k)
/// * `lags` - Number of lags to test (typically 1 for AR(1))
///
/// # Returns
/// Tuple of (LM_statistic, p_value, degrees_of_freedom)
///
/// # Interpretation
/// - If p < 0.05: Reject H₀, autocorrelation detected
/// - If p > 0.05: Fail to reject H₀, no evidence of autocorrelation
///
/// # Example
/// ```no_run
/// use greeners::SpecificationTests;
///
/// let (lm_stat, p_value, df) = SpecificationTests::breusch_godfrey_test(&residuals, &x, 1)?;
/// if p_value < 0.05 {
/// println!("Autocorrelation detected! Consider using Newey-West SE.");
/// }
/// ```
pub fn breusch_godfrey_test(
residuals: &Array1<f64>,
x: &Array2<f64>,
lags: usize,
) -> Result<(f64, f64, usize), String> {
let n = residuals.len();
let _k = x.ncols();
if lags >= n {
return Err("Number of lags must be less than sample size".to_string());
}
// Create lagged residuals matrix
let mut x_augmented = x.to_owned();
for lag in 1..=lags {
let mut lagged = Array1::<f64>::zeros(n);
for i in lag..n {
lagged[i] = residuals[i - lag];
}
// Append lagged residuals as new column
let n_rows = x_augmented.nrows();
let n_cols = x_augmented.ncols();
let mut new_x = Array2::<f64>::zeros((n_rows, n_cols + 1));
new_x.slice_mut(ndarray::s![.., 0..n_cols]).assign(&x_augmented);
new_x.column_mut(n_cols).assign(&lagged);
x_augmented = new_x;
}
// Drop first 'lags' observations to avoid zeros
let x_aug_trim = x_augmented.slice(ndarray::s![lags.., ..]).to_owned();
let u_trim = residuals.slice(ndarray::s![lags..]).to_owned();
let n_trim = u_trim.len();
// Auxiliary regression: u_t = X*β + γ₁*u_{t-1} + ... + γₚ*u_{t-p} + error
let x_aug_t = x_aug_trim.t();
let xtx_aug = x_aug_t.dot(&x_aug_trim);
let xtx_aug_inv: Array2<f64> = match xtx_aug.inv() {
Ok(inv) => inv,
Err(_) => return Err("Singular matrix in Breusch-Godfrey test".to_string()),
};
let xty_aug = x_aug_t.dot(&u_trim);
let beta_aug: Array1<f64> = xtx_aug_inv.dot(&xty_aug);
let fitted_aug: Array1<f64> = x_aug_trim.dot(&beta_aug);
// Calculate R² for auxiliary regression
let mean_u = u_trim.mean().unwrap_or(0.0);
let tss = u_trim.iter().map(|&u| (u - mean_u).powi(2)).sum::<f64>();
let rss = fitted_aug
.iter()
.zip(u_trim.iter())
.map(|(&f, &u)| (u - f).powi(2))
.sum::<f64>();
let r_squared = 1.0 - rss / tss;
// LM statistic: n * R²
let lm_stat = (n_trim as f64) * r_squared;
// Degrees of freedom = number of lags
let df = lags;
// Under H₀, LM ~ χ²(lags)
let chi2_dist = ChiSquared::new(df as f64).map_err(|e| e.to_string())?;
let p_value = 1.0 - chi2_dist.cdf(lm_stat);
Ok((lm_stat, p_value, df))
}
/// Goldfeld-Quandt Test for Heteroskedasticity
///
/// Tests H₀: Homoskedasticity vs H₁: Variance increases with ordering variable
///
/// # Arguments
/// * `residuals` - Residuals from OLS regression (should be ordered by suspected variable)
/// * `split_fraction` - Fraction of middle observations to drop (typically 0.2 to 0.33)
///
/// # Returns
/// Tuple of (F_statistic, p_value, df1, df2)
///
/// # Interpretation
/// - If p < 0.05: Reject H₀, heteroskedasticity detected
/// - If p > 0.05: Fail to reject H₀, homoskedasticity plausible
pub fn goldfeld_quandt_test(
residuals: &Array1<f64>,
split_fraction: f64,
) -> Result<(f64, f64, usize, usize), String> {
let n = residuals.len();
let drop_n = (n as f64 * split_fraction) as usize;
let group_size = (n - drop_n) / 2;
if group_size < 2 {
return Err("Insufficient observations for Goldfeld-Quandt test".to_string());
}
// First group: observations 0 to group_size-1
let group1 = residuals.slice(ndarray::s![0..group_size]);
let ssr1: f64 = group1.iter().map(|&r| r.powi(2)).sum();
// Second group: observations n-group_size to n-1
let group2 = residuals.slice(ndarray::s![(n - group_size)..]);
let ssr2: f64 = group2.iter().map(|&r| r.powi(2)).sum();
// F-statistic: ratio of variances (larger / smaller)
let f_stat = if ssr2 > ssr1 {
ssr2 / ssr1
} else {
ssr1 / ssr2
};
let df1 = group_size;
let df2 = group_size;
// P-value (two-tailed)
let f_dist = FisherSnedecor::new(df1 as f64, df2 as f64).map_err(|e| e.to_string())?;
let p_value = 2.0 * (1.0 - f_dist.cdf(f_stat)).min(f_dist.cdf(f_stat));
Ok((f_stat, p_value, df1, df2))
}
/// Pretty print specification test results
pub fn print_test_result(
test_name: &str,
statistic: f64,
p_value: f64,
null_hypothesis: &str,
alternative_hypothesis: &str,
) {
println!("\n{:=^80}", format!(" {} ", test_name));
println!("{:-^80}", "");
println!("H₀: {}", null_hypothesis);
println!("H₁: {}", alternative_hypothesis);
println!("\nTest Statistic: {:.4}", statistic);
println!("P-value: {:.6}", p_value);
if p_value < 0.01 {
println!("\n✅ REJECT H₀ at 1% level (p < 0.01)");
println!(" → Strong evidence for H₁");
} else if p_value < 0.05 {
println!("\n✅ REJECT H₀ at 5% level (p < 0.05)");
println!(" → Evidence for H₁");
} else if p_value < 0.10 {
println!("\n⚠️ MARGINALLY REJECT H₀ at 10% level (p < 0.10)");
println!(" → Weak evidence for H₁");
} else {
println!("\n❌ FAIL TO REJECT H₀ (p > 0.10)");
println!(" → No evidence against H₀");
}
println!("{:=^80}", "");
}
}