use crate::error::{Result, TimeSeriesError};
#[derive(Debug, Clone)]
pub struct PanelData {
pub y: Vec<Vec<f64>>,
pub x: Vec<Vec<Vec<f64>>>,
pub n_individuals: usize,
pub n_periods: usize,
pub n_regressors: usize,
}
impl PanelData {
pub fn new(y: Vec<Vec<f64>>, x: Vec<Vec<Vec<f64>>>) -> Result<Self> {
let n = y.len();
if n == 0 {
return Err(TimeSeriesError::InvalidInput(
"PanelData: no individuals (empty y)".into(),
));
}
if x.len() != n {
return Err(TimeSeriesError::InvalidInput(format!(
"PanelData: x has {} individuals but y has {}",
x.len(),
n
)));
}
let t = y[0].len();
if t == 0 {
return Err(TimeSeriesError::InvalidInput(
"PanelData: zero time periods".into(),
));
}
for (i, (yi, xi)) in y.iter().zip(x.iter()).enumerate() {
if yi.len() != t {
return Err(TimeSeriesError::InvalidInput(format!(
"PanelData: individual {} has {} periods, expected {}",
i,
yi.len(),
t
)));
}
if xi.len() != t {
return Err(TimeSeriesError::InvalidInput(format!(
"PanelData: individual {} has x with {} periods, expected {}",
i,
xi.len(),
t
)));
}
}
let k = if x[0].is_empty() || x[0][0].is_empty() {
0
} else {
x[0][0].len()
};
for (i, xi) in x.iter().enumerate() {
for (t_idx, xt) in xi.iter().enumerate() {
if xt.len() != k {
return Err(TimeSeriesError::InvalidInput(format!(
"PanelData: individual {i}, period {t_idx}: {} regressors, expected {k}",
xt.len()
)));
}
}
}
Ok(Self {
n_individuals: n,
n_periods: t,
n_regressors: k,
y,
x,
})
}
}
#[derive(Debug, Clone)]
pub struct FixedEffectsModel {
pub coefficients: Vec<f64>,
pub std_errors: Vec<f64>,
pub t_stats: Vec<f64>,
pub fixed_effects: Vec<f64>,
pub sigma2: f64,
pub r_squared: f64,
pub n_individuals: usize,
pub n_periods: usize,
pub n_regressors: usize,
pub(crate) xtx_inv: Vec<f64>,
}
impl FixedEffectsModel {
pub fn fit(data: &PanelData) -> Result<Self> {
let n = data.n_individuals;
let t = data.n_periods;
let k = data.n_regressors;
if n * t < k + n + 2 {
return Err(TimeSeriesError::InsufficientData {
message: "FE: too few observations for model identification".into(),
required: k + n + 2,
actual: n * t,
});
}
let mut y_mean = vec![0.0_f64; n];
let mut x_mean = vec![vec![0.0_f64; k]; n];
for i in 0..n {
y_mean[i] = data.y[i].iter().sum::<f64>() / t as f64;
for j in 0..k {
x_mean[i][j] =
data.x[i].iter().map(|xt| xt[j]).sum::<f64>() / t as f64;
}
}
let n_obs = n * t;
let mut y_tilde = Vec::with_capacity(n_obs);
let mut x_tilde = Vec::with_capacity(n_obs * k);
for i in 0..n {
for tt in 0..t {
y_tilde.push(data.y[i][tt] - y_mean[i]);
for j in 0..k {
x_tilde.push(data.x[i][tt][j] - x_mean[i][j]);
}
}
}
let mut xtx = vec![0.0_f64; k * k];
let mut xty = vec![0.0_f64; k];
for obs in 0..n_obs {
for a in 0..k {
xty[a] += x_tilde[obs * k + a] * y_tilde[obs];
for b in 0..k {
xtx[a * k + b] += x_tilde[obs * k + a] * x_tilde[obs * k + b];
}
}
}
let xtx_inv = invert_sym(k, &xtx)?;
let beta = mat_vec_mul(k, &xtx_inv, &xty);
let mut fixed_effects = Vec::with_capacity(n);
for i in 0..n {
let xb: f64 = (0..k).map(|j| x_mean[i][j] * beta[j]).sum();
fixed_effects.push(y_mean[i] - xb);
}
let df = n_obs as f64 - n as f64 - k as f64;
let df = df.max(1.0);
let mut ss_res = 0.0_f64;
let mut ss_tot = 0.0_f64;
let grand_mean_y: f64 = y_tilde.iter().sum::<f64>();
for obs in 0..n_obs {
let xb: f64 = (0..k).map(|j| x_tilde[obs * k + j] * beta[j]).sum();
let resid = y_tilde[obs] - xb;
ss_res += resid * resid;
ss_tot += (y_tilde[obs] - grand_mean_y).powi(2);
}
let sigma2 = ss_res / df;
let r_squared = if ss_tot > 0.0 { 1.0 - ss_res / ss_tot } else { 0.0 };
let std_errors: Vec<f64> = (0..k)
.map(|j| (sigma2 * xtx_inv[j * k + j]).sqrt())
.collect();
let t_stats: Vec<f64> = beta
.iter()
.zip(std_errors.iter())
.map(|(&b, &se)| if se > 0.0 { b / se } else { 0.0 })
.collect();
Ok(Self {
coefficients: beta,
std_errors,
t_stats,
fixed_effects,
sigma2,
r_squared,
n_individuals: n,
n_periods: t,
n_regressors: k,
xtx_inv,
})
}
pub fn predict(&self, data: &PanelData) -> Result<Vec<Vec<f64>>> {
let n = data.n_individuals;
let t = data.n_periods;
let k = data.n_regressors;
if k != self.n_regressors {
return Err(TimeSeriesError::InvalidInput(format!(
"predict: expected {} regressors, got {}",
self.n_regressors, k
)));
}
if n != self.n_individuals {
return Err(TimeSeriesError::InvalidInput(format!(
"predict: expected {} individuals, got {}",
self.n_individuals, n
)));
}
let mut fitted = vec![vec![0.0_f64; t]; n];
for i in 0..n {
let alpha_i = if i < self.fixed_effects.len() {
self.fixed_effects[i]
} else {
0.0
};
for tt in 0..t {
let xb: f64 = (0..k).map(|j| data.x[i][tt][j] * self.coefficients[j]).sum();
fitted[i][tt] = alpha_i + xb;
}
}
Ok(fitted)
}
}
pub(crate) fn invert_sym(k: usize, a: &[f64]) -> Result<Vec<f64>> {
if k == 0 {
return Ok(Vec::new());
}
let mut l = vec![0.0_f64; k * k];
for i in 0..k {
for j in 0..=i {
let mut sum: f64 = a[i * k + j];
for p in 0..j {
sum -= l[i * k + p] * l[j * k + p];
}
if i == j {
if sum <= 0.0 {
return Err(TimeSeriesError::NumericalError(
"FE OLS: Cholesky failure — X̃'X̃ is not positive-definite (possible multicollinearity)".into(),
));
}
l[i * k + i] = sum.sqrt();
} else {
l[i * k + j] = sum / l[j * k + j];
}
}
}
let mut l_inv = vec![0.0_f64; k * k];
for col in 0..k {
l_inv[col * k + col] = 1.0 / l[col * k + col];
for row in (col + 1)..k {
let mut sum = 0.0_f64;
for p in col..row {
sum += l[row * k + p] * l_inv[p * k + col];
}
l_inv[row * k + col] = -sum / l[row * k + row];
}
}
let mut a_inv = vec![0.0_f64; k * k];
for i in 0..k {
for j in 0..k {
let mut val = 0.0_f64;
for p in i.max(j)..k {
val += l_inv[p * k + i] * l_inv[p * k + j];
}
a_inv[i * k + j] = val;
}
}
Ok(a_inv)
}
pub(crate) fn mat_vec_mul(k: usize, m: &[f64], v: &[f64]) -> Vec<f64> {
(0..k)
.map(|i| (0..k).map(|j| m[i * k + j] * v[j]).sum())
.collect()
}
pub(crate) fn mat_mul(k: usize, a: &[f64], b: &[f64]) -> Vec<f64> {
let mut c = vec![0.0_f64; k * k];
for i in 0..k {
for j in 0..k {
for p in 0..k {
c[i * k + j] += a[i * k + p] * b[p * k + j];
}
}
}
c
}
pub(crate) fn mat_sub(k: usize, a: &[f64], b: &[f64]) -> Vec<f64> {
a.iter().zip(b.iter()).map(|(&ai, &bi)| ai - bi).collect()
}
#[cfg(test)]
mod tests {
use super::*;
fn make_panel(n: usize, t: usize) -> (Vec<Vec<f64>>, Vec<Vec<Vec<f64>>>) {
let alpha: Vec<f64> = (0..n).map(|i| i as f64 * 0.5).collect();
let mut y = vec![vec![0.0; t]; n];
let mut x = vec![vec![vec![0.0; 1]; t]; n];
for i in 0..n {
for tt in 0..t {
let xit = (i as f64 + tt as f64 * 0.3).sin() + 1.0;
x[i][tt][0] = xit;
y[i][tt] = alpha[i] + 2.0 * xit;
}
}
(y, x)
}
#[test]
fn test_panel_data_new_valid() {
let (y, x) = make_panel(4, 5);
let data = PanelData::new(y, x).expect("Should create PanelData");
assert_eq!(data.n_individuals, 4);
assert_eq!(data.n_periods, 5);
assert_eq!(data.n_regressors, 1);
}
#[test]
fn test_panel_data_mismatched_lengths() {
let y = vec![vec![1.0, 2.0], vec![3.0]]; let x = vec![vec![vec![1.0]; 2], vec![vec![1.0]; 2]];
assert!(PanelData::new(y, x).is_err());
}
#[test]
fn test_fe_fit_recovers_true_beta() {
let (y, x) = make_panel(10, 8);
let data = PanelData::new(y, x).expect("PanelData");
let model = FixedEffectsModel::fit(&data).expect("FE fit");
assert!(
(model.coefficients[0] - 2.0).abs() < 1e-6,
"β̂ = {:.6}, expected 2.0",
model.coefficients[0]
);
}
#[test]
fn test_fe_fixed_effects_consistency() {
let (y, x) = make_panel(5, 6);
let data = PanelData::new(y, x).expect("PanelData");
let model = FixedEffectsModel::fit(&data).expect("FE fit");
assert_eq!(model.fixed_effects.len(), 5);
for (i, &fe) in model.fixed_effects.iter().enumerate() {
let true_alpha = i as f64 * 0.5;
assert!(
(fe - true_alpha).abs() < 1e-5,
"FE[{i}] = {fe:.4}, expected {true_alpha:.4}"
);
}
}
#[test]
fn test_fe_predict_shape() {
let (y, x) = make_panel(4, 5);
let data = PanelData::new(y, x).expect("PanelData");
let model = FixedEffectsModel::fit(&data).expect("FE fit");
let fitted = model.predict(&data).expect("predict");
assert_eq!(fitted.len(), 4);
assert_eq!(fitted[0].len(), 5);
}
}