1use ndarray::Array1;
4use serde::{Deserialize, Serialize};
5use statrs::distribution::{ContinuousCDF, Normal};
6
7use crate::glm::family::{Family, Link};
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct GLMResults {
12 pub coefficients: Array1<f64>,
14 pub std_errors: Array1<f64>,
16 pub z_values: Array1<f64>,
18 pub p_values: Array1<f64>,
20 pub fitted_values: Array1<f64>,
22 pub residuals: Array1<f64>,
24 pub pearson_residuals: Array1<f64>,
26 pub hat_matrix_diag: Array1<f64>,
28 pub scale: f64,
30 pub deviance: f64,
32 pub null_deviance: f64,
34 pub df_residual: usize,
36 pub df_null: usize,
38 pub aic: f64,
40 pub bic: f64,
42 pub converged: bool,
44 pub iterations: usize,
46 pub family: Family,
48 pub link: Link,
50 pub intercept: bool,
52 pub n_obs: usize,
54 pub n_params: usize,
56}
57
58impl GLMResults {
59 pub fn summary(&self, feature_names: &[String]) -> String {
61 let n_coef = self.coefficients.len();
62 let intercept_included = self.intercept;
63
64 let mut summary = String::new();
65
66 summary.push_str("Generalized Linear Model Results\n");
68 summary.push_str("================================\n");
69 summary.push_str(&format!(
70 "Family: {} ({})\n",
71 self.family.name(),
72 self.link.name()
73 ));
74 summary.push_str(&format!("Link: {}\n", self.link.name()));
75 summary.push_str(&format!("Number of observations: {}\n", self.n_obs));
76 summary.push_str(&format!(
77 "Degrees of Freedom: {} total, {} residual\n",
78 self.n_params, self.df_residual
79 ));
80 summary.push_str(&format!("Scale (dispersion): {:.4}\n", self.scale));
81
82 summary.push_str("\nDeviance Residuals:\n");
84 summary.push_str(&format!(
85 " Null deviance: {:.4} on {} df\n",
86 self.null_deviance, self.df_null
87 ));
88 summary.push_str(&format!(
89 "Residual deviance: {:.4} on {} df\n",
90 self.deviance, self.df_residual
91 ));
92
93 if self.null_deviance > 0.0 {
94 let pseudo_r2 = 1.0 - self.deviance / self.null_deviance;
95 summary.push_str(&format!("Pseudo R-squared: {:.4}\n", pseudo_r2));
96 }
97
98 summary.push_str(&format!("AIC: {:.2}, BIC: {:.2}\n", self.aic, self.bic));
99
100 summary.push_str(&format!(
102 "\nIRLS converged: {} ({} iterations)\n",
103 self.converged, self.iterations
104 ));
105
106 summary.push_str("\nCoefficients:\n");
108 summary.push_str(&format!(
109 "{:>20} {:>10} {:>10} {:>10} {:>10}\n",
110 " ", "Estimate", "Std. Error", "z value", "Pr(>|z|)"
111 ));
112 summary.push_str(&format!("{}\n", "-".repeat(60)));
113
114 for i in 0..n_coef {
115 let name = if i == 0 && intercept_included {
116 "(Intercept)".to_string()
117 } else if intercept_included {
118 feature_names
119 .get(i)
120 .cloned()
121 .unwrap_or_else(|| format!("x{}", i))
122 } else {
123 feature_names
124 .get(i)
125 .cloned()
126 .unwrap_or_else(|| format!("x{}", i))
127 };
128
129 let coef = self.coefficients[i];
130 let se = self.std_errors[i];
131 let z = self.z_values[i];
132 let p = self.p_values[i];
133
134 let significance = if p < 0.001 {
135 "***"
136 } else if p < 0.01 {
137 "**"
138 } else if p < 0.05 {
139 "*"
140 } else if p < 0.1 {
141 "."
142 } else {
143 ""
144 };
145
146 summary.push_str(&format!(
147 "{:>20} {:>10.4} {:>10.4} {:>10.4} {:>10.4} {}\n",
148 name, coef, se, z, p, significance
149 ));
150 }
151
152 summary.push_str(&format!("{}\n", "-".repeat(60)));
153 summary.push_str("Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n");
154
155 if (self.family == Family::Poisson || self.family == Family::Binomial)
157 && (self.scale - 1.0).abs() > 0.1
158 {
159 summary.push_str(&format!(
160 "\nWarning: Dispersion parameter is {:.3}, not 1.\n",
161 self.scale
162 ));
163 summary.push_str(&format!(
164 "Consider using quasi-{} family.\n",
165 self.family.name().to_lowercase()
166 ));
167 }
168
169 summary
170 }
171
172 pub fn coefficients_with_names(&self, feature_names: &[String]) -> Vec<(String, f64)> {
174 let n_coef = self.coefficients.len();
175 let mut result = Vec::with_capacity(n_coef);
176
177 for i in 0..n_coef {
178 let name = if i == 0 && self.intercept {
179 "(Intercept)".to_string()
180 } else if self.intercept {
181 feature_names
182 .get(i)
183 .cloned()
184 .unwrap_or_else(|| format!("x{}", i))
185 } else {
186 feature_names
187 .get(i)
188 .cloned()
189 .unwrap_or_else(|| format!("x{}", i))
190 };
191
192 result.push((name, self.coefficients[i]));
193 }
194
195 result
196 }
197
198 pub fn confidence_intervals(
200 &self,
201 alpha: f64,
202 feature_names: &[String],
203 ) -> Vec<(String, f64, f64)> {
204 let n_coef = self.coefficients.len();
205 let z_critical = 1.0 - alpha / 2.0;
206 let z_value = Normal::new(0.0, 1.0).unwrap().inverse_cdf(z_critical);
207
208 let mut intervals = Vec::with_capacity(n_coef);
209
210 for i in 0..n_coef {
211 let name = if i == 0 && self.intercept {
212 "(Intercept)".to_string()
213 } else if self.intercept {
214 feature_names
215 .get(i)
216 .cloned()
217 .unwrap_or_else(|| format!("x{}", i))
218 } else {
219 feature_names
220 .get(i)
221 .cloned()
222 .unwrap_or_else(|| format!("x{}", i))
223 };
224
225 let coef = self.coefficients[i];
226 let se = self.std_errors[i];
227
228 let lower = coef - z_value * se;
229 let upper = coef + z_value * se;
230
231 intervals.push((name, lower, upper));
232 }
233
234 intervals
235 }
236
237 pub fn deviance_residuals(&self, y: &Array1<f64>) -> Array1<f64> {
239 y.iter()
240 .zip(self.fitted_values.iter())
241 .map(|(&y_val, &mu_val)| {
242 let unit_deviance = self.family.unit_deviance(y_val, mu_val);
243 let sign = if y_val > mu_val { 1.0 } else { -1.0 };
244 sign * unit_deviance.sqrt()
245 })
246 .collect()
247 }
248
249 pub fn cooks_distance(&self) -> Array1<f64> {
251 let n = self.n_obs;
252 let p = self.n_params;
253
254 let mut cooks = Array1::zeros(n);
255
256 for i in 0..n {
257 let hii = self.hat_matrix_diag[i];
258 let pearson_resid = self.pearson_residuals[i];
259
260 if hii < 1.0 && self.scale > 0.0 {
261 cooks[i] =
262 (pearson_resid.powi(2) / (p as f64 * self.scale)) * (hii / (1.0 - hii).powi(2));
263 }
264 }
265
266 cooks
267 }
268
269 pub fn standardized_pearson_residuals(&self) -> Array1<f64> {
271 let n = self.n_obs;
272 let mut std_residuals = Array1::zeros(n);
273
274 for i in 0..n {
275 let hii = self.hat_matrix_diag[i];
276 let pearson_resid = self.pearson_residuals[i];
277
278 if self.scale > 0.0 && hii < 1.0 {
279 std_residuals[i] = pearson_resid / ((self.scale * (1.0 - hii)).sqrt());
280 }
281 }
282
283 std_residuals
284 }
285
286 pub fn overdispersion_test(&self) -> (f64, f64) {
288 let n = self.n_obs;
289 let p = self.n_params;
290
291 if self.family == Family::Poisson || self.family == Family::Binomial {
292 let pearson_chi2: f64 = self.pearson_residuals.iter().map(|r| r.powi(2)).sum();
293 let df = (n - p) as f64;
294
295 let test_stat = pearson_chi2 / df;
296 let p_value = 1.0 - statrs::function::gamma::gamma_ur(df / 2.0, df / 2.0 * test_stat);
297
298 (test_stat, p_value)
299 } else {
300 (f64::NAN, f64::NAN)
301 }
302 }
303
304 pub fn log_likelihood(&self, y: &Array1<f64>) -> f64 {
306 let n = self.n_obs;
307 let mut ll = 0.0;
308
309 for i in 0..n {
310 let y_val = y[i];
311 let mu_val = self.fitted_values[i];
312
313 match self.family {
315 Family::Gaussian => {
316 ll += -0.5 * (y_val - mu_val).powi(2) / self.scale;
317 }
318 Family::Binomial => {
319 if y_val == 0.0 {
320 ll += (1.0 - mu_val).ln().max(-100.0);
321 } else if y_val == 1.0 {
322 ll += mu_val.ln().max(-100.0);
323 } else {
324 ll += y_val * mu_val.ln().max(-100.0)
325 + (1.0 - y_val) * (1.0 - mu_val).ln().max(-100.0);
326 }
327 }
328 Family::Poisson => {
329 ll += y_val * mu_val.ln().max(-100.0) - mu_val;
330 }
331 Family::Gamma => {
332 let shape = 1.0 / self.scale;
334 ll += shape * mu_val.ln().max(-100.0)
335 - shape * y_val / mu_val
336 - shape.ln().max(-100.0)
337 - (shape - 1.0) * y_val.ln().max(-100.0);
338 }
339 Family::InverseGaussian => {
340 ll += -0.5 * (y_val - mu_val).powi(2) / (self.scale * mu_val.powi(2) * y_val);
342 }
343 }
344 }
345
346 ll
347 }
348
349 pub fn pseudo_r_squared(&self) -> (f64, f64, f64) {
351 let mcfadden = 1.0 - self.deviance / self.null_deviance;
353
354 let cox_snell = 1.0 - (self.deviance / self.null_deviance).powf(2.0 / self.n_obs as f64);
356
357 let nagelkerke = cox_snell / (1.0 - (-2.0 * self.null_deviance / self.n_obs as f64).exp());
359
360 (mcfadden, cox_snell, nagelkerke)
361 }
362
363 pub fn likelihood_ratio_test(&self) -> (f64, f64) {
365 let lr_stat = self.null_deviance - self.deviance;
366 let df = (self.n_params - if self.intercept { 1 } else { 0 }) as f64;
367
368 let p_value = 1.0 - statrs::function::gamma::gamma_ur(df / 2.0, lr_stat / 2.0);
369
370 (lr_stat, p_value)
371 }
372
373 pub fn model_info(&self) -> String {
375 format!(
376 "GLM(family={}, link={}, intercept={}, n_obs={}, n_params={})",
377 self.family.name(),
378 self.link.name(),
379 self.intercept,
380 self.n_obs,
381 self.n_params
382 )
383 }
384}