use crate::{CovarianceType, GreenersError, OLS, DataFrame, Formula};
use ndarray::{Array1, Array2, Axis};
use std::collections::HashMap;
use std::fmt;
use std::hash::Hash;
#[derive(Debug)]
pub struct PanelResult {
pub params: Array1<f64>,
pub std_errors: Array1<f64>,
pub t_values: Array1<f64>,
pub p_values: Array1<f64>,
pub r_squared: f64, pub n_obs: usize,
pub n_entities: usize, pub df_resid: usize, pub sigma: f64,
}
impl fmt::Display for PanelResult {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(f, "\n{:=^78}", " Fixed Effects (Within) Regression ")?;
writeln!(
f,
"{:<20} {:>15} || {:<20} {:>15.4}",
"Dep. Variable:", "y", "Within R-sq:", self.r_squared
)?;
writeln!(
f,
"{:<20} {:>15} || {:<20} {:>15}",
"Estimator:", "Fixed Effects", "No. Entities:", self.n_entities
)?;
writeln!(
f,
"{:<20} {:>15} || {:<20} {:>15.4e}",
"No. Observations:", self.n_obs, "Sigma:", self.sigma
)?;
writeln!(f, "\n{:-^78}", "")?;
writeln!(
f,
"{:<10} | {:>10} | {:>10} | {:>8} | {:>8}",
"Variable", "coef", "std err", "t", "P>|t|"
)?;
writeln!(f, "{:-^78}", "")?;
for i in 0..self.params.len() {
writeln!(
f,
"x{:<9} | {:>10.4} | {:>10.4} | {:>8.3} | {:>8.3}",
i, self.params[i], self.std_errors[i], self.t_values[i], self.p_values[i]
)?;
}
writeln!(f, "{:=^78}", "")
}
}
pub struct FixedEffects;
impl FixedEffects {
pub fn from_formula<T>(
formula: &Formula,
data: &DataFrame,
entity_ids: &[T],
) -> Result<PanelResult, GreenersError>
where
T: Eq + Hash + Clone,
{
let (y, x) = data.to_design_matrix(formula)?;
Self::fit(&y, &x, entity_ids)
}
fn within_transform<T>(data: &Array2<f64>, groups: &[T]) -> Result<Array2<f64>, GreenersError>
where
T: Eq + Hash + Clone,
{
let n_rows = data.nrows();
let n_cols = data.ncols();
if n_rows != groups.len() {
return Err(GreenersError::ShapeMismatch(
"Data rows and Group IDs length mismatch".into(),
));
}
let mut group_sums: HashMap<T, Array1<f64>> = HashMap::new();
let mut group_counts: HashMap<T, usize> = HashMap::new();
for (i, group_id) in groups.iter().enumerate() {
let row = data.row(i).to_owned();
group_sums
.entry(group_id.clone())
.and_modify(|sum| *sum = &*sum + &row)
.or_insert(row);
*group_counts.entry(group_id.clone()).or_insert(0) += 1;
}
let mut transformed_data = Array2::zeros((n_rows, n_cols));
for (i, group_id) in groups.iter().enumerate() {
let sum = &group_sums[group_id];
let count = group_counts[group_id] as f64;
let mean = sum / count;
let original_row = data.row(i);
let demeaned_row = &original_row - &mean;
transformed_data.row_mut(i).assign(&demeaned_row);
}
Ok(transformed_data)
}
pub fn fit<T>(
y: &Array1<f64>,
x: &Array2<f64>,
groups: &[T],
) -> Result<PanelResult, GreenersError>
where
T: Eq + Hash + Clone,
{
let n = x.nrows();
let y_mat = y.view().insert_axis(Axis(1)).to_owned();
let y_demeaned_mat = Self::within_transform(&y_mat, groups)?;
let x_demeaned = Self::within_transform(x, groups)?;
let y_demeaned = y_demeaned_mat.column(0).to_owned();
let ols_result = OLS::fit(&y_demeaned, &x_demeaned, CovarianceType::NonRobust)?;
let mut unique_groups: HashMap<T, bool> = HashMap::new();
for g in groups {
unique_groups.insert(g.clone(), true);
}
let n_entities = unique_groups.len();
let k = x.ncols();
let df_resid_correct = n - k - (n_entities - 1);
if df_resid_correct == 0 {
return Err(GreenersError::ShapeMismatch(
"Not enough degrees of freedom for Fixed Effects".into(),
));
}
let residuals = &y_demeaned - &x_demeaned.dot(&ols_result.params);
let ssr = residuals.dot(&residuals);
let sigma2 = ssr / (df_resid_correct as f64);
let sigma = sigma2.sqrt();
let adjustment_factor = (ols_result.df_resid as f64) / (df_resid_correct as f64);
let old_vars = ols_result.std_errors.mapv(|se| se.powi(2));
let new_vars = old_vars * adjustment_factor;
let std_errors = new_vars.mapv(f64::sqrt);
let t_values = &ols_result.params / &std_errors;
Ok(PanelResult {
params: ols_result.params,
std_errors,
t_values,
p_values: ols_result.p_values,
r_squared: ols_result.r_squared,
n_obs: n,
n_entities,
df_resid: df_resid_correct,
sigma,
})
}
}
#[derive(Debug)]
pub struct RandomEffectsResult {
pub params: Array1<f64>,
pub std_errors: Array1<f64>,
pub t_values: Array1<f64>,
pub p_values: Array1<f64>,
pub r_squared_overall: f64,
pub sigma_u: f64, pub sigma_e: f64, pub theta: f64, }
impl fmt::Display for RandomEffectsResult {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(f, "\n{:=^78}", " Random Effects (GLS) - Swamy-Arora ")?;
writeln!(
f,
"{:<20} {:>15.4}",
"R-squared (Over):", self.r_squared_overall
)?;
writeln!(f, "{:<20} {:>15.4}", "Theta:", self.theta)?;
writeln!(
f,
"{:<20} {:>15.4}",
"Sigma Alpha (Ind):", self.sigma_e
)?;
writeln!(
f,
"{:<20} {:>15.4}",
"Sigma U (Idiosync):", self.sigma_u
)?;
writeln!(f, "\n{:-^78}", "")?;
writeln!(
f,
"{:<10} | {:>10} | {:>10} | {:>8} | {:>8}",
"Variable", "Coef", "Std Err", "t", "P>|t|"
)?;
writeln!(f, "{:-^78}", "")?;
for i in 0..self.params.len() {
writeln!(
f,
"x{:<9} | {:>10.4} | {:>10.4} | {:>8.3} | {:>8.3}",
i, self.params[i], self.std_errors[i], self.t_values[i], self.p_values[i]
)?;
}
writeln!(f, "{:=^78}", "")
}
}
pub struct RandomEffects;
impl RandomEffects {
pub fn from_formula(
formula: &Formula,
data: &DataFrame,
entity_ids: &Array1<i64>,
) -> Result<RandomEffectsResult, GreenersError> {
let (y, x) = data.to_design_matrix(formula)?;
Self::fit(&y, &x, entity_ids)
}
pub fn fit(
y: &Array1<f64>,
x: &Array2<f64>,
entity_ids: &Array1<i64>, ) -> Result<RandomEffectsResult, GreenersError> {
let n_obs = y.len();
let k = x.ncols();
if entity_ids.len() != n_obs {
return Err(GreenersError::ShapeMismatch(
"Entity IDs length mismatch".into(),
));
}
let mut groups: HashMap<i64, Vec<usize>> = HashMap::new();
for (idx, &id) in entity_ids.iter().enumerate() {
groups.entry(id).or_insert(Vec::new()).push(idx);
}
let n_entities = groups.len();
let t_bar = (n_obs as f64) / (n_entities as f64);
let mut y_within = y.clone();
let mut x_within = x.clone();
let mut y_means = Vec::new();
let mut x_means = Vec::new();
for indices in groups.values() {
let t_i = indices.len() as f64;
let mut y_sum = 0.0;
let mut x_sum = Array1::<f64>::zeros(k);
for &idx in indices {
y_sum += y[idx];
x_sum = &x_sum + &x.row(idx);
}
let y_mean = y_sum / t_i;
let x_mean = x_sum / t_i;
y_means.push(y_mean);
for val in x_mean.iter() {
x_means.push(*val);
}
for &idx in indices {
y_within[idx] -= y_mean;
let mut row = x_within.row_mut(idx);
row -= &x_mean;
}
}
let mut keep_indices = Vec::new();
for j in 0..k {
let col = x_within.column(j);
let variance = col.var(0.0); if variance > 1e-12 {
keep_indices.push(j);
}
}
let x_within_clean = x_within.select(Axis(1), &keep_indices);
let fe_model = OLS::fit(&y_within, &x_within_clean, CovarianceType::NonRobust)?;
let residuals_fe = &y_within - &x_within_clean.dot(&fe_model.params);
let ssr_within = residuals_fe.mapv(|v| v.powi(2)).sum();
let k_eff = keep_indices.len();
let df_resid_within = (n_obs as f64 - n_entities as f64 - k_eff as f64).max(1.0);
let sigma_u_sq = ssr_within / df_resid_within;
let y_between_arr = Array1::from(y_means);
let x_between_arr = Array2::from_shape_vec((n_entities, k), x_means)
.map_err(|e| GreenersError::ShapeMismatch(e.to_string()))?;
let be_model = OLS::fit(
&y_between_arr,
&x_between_arr,
CovarianceType::NonRobust,
)?;
let residuals_be = &y_between_arr - &x_between_arr.dot(&be_model.params);
let ssr_between = residuals_be.mapv(|v| v.powi(2)).sum();
let df_resid_between = (n_entities as f64 - k as f64).max(1.0);
let sigma_b_sq = ssr_between / df_resid_between;
let sigma_e_sq = (sigma_b_sq - (sigma_u_sq / t_bar)).max(0.0);
let theta = 1.0 - (sigma_u_sq / (sigma_u_sq + t_bar * sigma_e_sq)).sqrt();
let mut y_gls = y.clone();
let mut x_gls = x.clone();
for indices in groups.values() {
let t_i = indices.len() as f64;
let mut y_sum = 0.0;
let mut x_sum = Array1::<f64>::zeros(k);
for &idx in indices {
y_sum += y[idx];
x_sum = &x_sum + &x.row(idx);
}
let y_mean = y_sum / t_i;
let x_mean = x_sum / t_i;
for &idx in indices {
y_gls[idx] -= theta * y_mean;
let mut row = x_gls.row_mut(idx);
row -= &(&x_mean * theta);
}
}
let final_model = OLS::fit(&y_gls, &x_gls, CovarianceType::NonRobust)?;
let pred_original = x.dot(&final_model.params);
let y_mean_total = y.mean().unwrap();
let sst = (y - y_mean_total).mapv(|v| v.powi(2)).sum();
let ssr = (y - &pred_original).mapv(|v| v.powi(2)).sum();
let r2_overall = 1.0 - (ssr / sst);
Ok(RandomEffectsResult {
params: final_model.params,
std_errors: final_model.std_errors,
t_values: final_model.t_values,
p_values: final_model.p_values,
r_squared_overall: r2_overall,
sigma_u: sigma_u_sq.sqrt(),
sigma_e: sigma_e_sq.sqrt(),
theta,
})
}
}
#[derive(Debug)]
pub struct BetweenResult {
pub params: Array1<f64>,
pub std_errors: Array1<f64>,
pub t_values: Array1<f64>,
pub p_values: Array1<f64>,
pub r_squared: f64,
pub n_entities: usize,
}
impl fmt::Display for BetweenResult {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(f, "\n{:=^78}", " Between Estimator (Means) ")?;
writeln!(f, "{:<20} {:>15.4}", "R-squared:", self.r_squared)?;
writeln!(f, "{:<20} {:>15}", "No. Entities:", self.n_entities)?;
writeln!(f, "\n{:-^78}", "")?;
writeln!(f, "{:<10} | {:>10} | {:>10} | {:>8} | {:>8}",
"Variable", "Coef", "Std Err", "t", "P>|t|")?;
writeln!(f, "{:-^78}", "")?;
for i in 0..self.params.len() {
writeln!(f, "x{:<9} | {:>10.4} | {:>10.4} | {:>8.3} | {:>8.3}",
i, self.params[i], self.std_errors[i], self.t_values[i], self.p_values[i]
)?;
}
writeln!(f, "{:=^78}", "")
}
}
pub struct BetweenEstimator;
impl BetweenEstimator {
pub fn from_formula(
formula: &Formula,
data: &DataFrame,
entity_ids: &Array1<i64>,
) -> Result<BetweenResult, GreenersError> {
let (y, x) = data.to_design_matrix(formula)?;
Self::fit(&y, &x, entity_ids)
}
pub fn fit(
y: &Array1<f64>,
x: &Array2<f64>,
entity_ids: &Array1<i64>
) -> Result<BetweenResult, GreenersError> {
let n_obs = y.len();
let k = x.ncols();
if entity_ids.len() != n_obs {
return Err(GreenersError::ShapeMismatch("Entity IDs length mismatch".into()));
}
let mut groups: HashMap<i64, Vec<usize>> = HashMap::new();
for (idx, &id) in entity_ids.iter().enumerate() {
groups.entry(id).or_insert(Vec::new()).push(idx);
}
let n_entities = groups.len();
let mut y_means = Vec::with_capacity(n_entities);
let mut x_means = Vec::with_capacity(n_entities * k);
for indices in groups.values() {
let t_i = indices.len() as f64;
let mut y_sum = 0.0;
let mut x_sum = Array1::<f64>::zeros(k);
for &idx in indices {
y_sum += y[idx];
x_sum = &x_sum + &x.row(idx);
}
y_means.push(y_sum / t_i);
let x_mean = x_sum / t_i;
for val in x_mean.iter() {
x_means.push(*val);
}
}
let y_between = Array1::from(y_means);
let x_between = Array2::from_shape_vec((n_entities, k), x_means)
.map_err(|e| GreenersError::ShapeMismatch(e.to_string()))?;
let ols = OLS::fit(&y_between, &x_between, CovarianceType::NonRobust)?;
Ok(BetweenResult {
params: ols.params,
std_errors: ols.std_errors,
t_values: ols.t_values,
p_values: ols.p_values,
r_squared: ols.r_squared,
n_entities,
})
}
}