Skip to main content

so_models/glm/
results.rs

1//! GLM results structure and diagnostics
2
3use ndarray::Array1;
4use serde::{Deserialize, Serialize};
5use statrs::distribution::{ContinuousCDF, Normal};
6
7use crate::glm::family::{Family, Link};
8
9/// Results from fitting a Generalized Linear Model
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct GLMResults {
12    /// Estimated coefficients
13    pub coefficients: Array1<f64>,
14    /// Standard errors of coefficients
15    pub std_errors: Array1<f64>,
16    /// z-statistics for coefficients (Wald test)
17    pub z_values: Array1<f64>,
18    /// p-values for coefficients
19    pub p_values: Array1<f64>,
20    /// Fitted values (on response scale)
21    pub fitted_values: Array1<f64>,
22    /// Residuals (response scale)
23    pub residuals: Array1<f64>,
24    /// Pearson residuals
25    pub pearson_residuals: Array1<f64>,
26    /// Diagonal of hat matrix (leverage)
27    pub hat_matrix_diag: Array1<f64>,
28    /// Scale/dispersion parameter
29    pub scale: f64,
30    /// Residual deviance
31    pub deviance: f64,
32    /// Null deviance
33    pub null_deviance: f64,
34    /// Residual degrees of freedom
35    pub df_residual: usize,
36    /// Null degrees of freedom
37    pub df_null: usize,
38    /// Akaike Information Criterion
39    pub aic: f64,
40    /// Bayesian Information Criterion
41    pub bic: f64,
42    /// Whether IRLS converged
43    pub converged: bool,
44    /// Number of IRLS iterations
45    pub iterations: usize,
46    /// Distribution family
47    pub family: Family,
48    /// Link function
49    pub link: Link,
50    /// Whether intercept was included
51    pub intercept: bool,
52    /// Number of observations
53    pub n_obs: usize,
54    /// Number of parameters
55    pub n_params: usize,
56}
57
58impl GLMResults {
59    /// Create a summary string similar to R's summary.glm()
60    pub fn summary(&self, feature_names: &[String]) -> String {
61        let n_coef = self.coefficients.len();
62        let intercept_included = self.intercept;
63
64        let mut summary = String::new();
65
66        // Header
67        summary.push_str("Generalized Linear Model Results\n");
68        summary.push_str("================================\n");
69        summary.push_str(&format!(
70            "Family: {} ({})\n",
71            self.family.name(),
72            self.link.name()
73        ));
74        summary.push_str(&format!("Link: {}\n", self.link.name()));
75        summary.push_str(&format!("Number of observations: {}\n", self.n_obs));
76        summary.push_str(&format!(
77            "Degrees of Freedom: {} total, {} residual\n",
78            self.n_params, self.df_residual
79        ));
80        summary.push_str(&format!("Scale (dispersion): {:.4}\n", self.scale));
81
82        // Deviance table
83        summary.push_str("\nDeviance Residuals:\n");
84        summary.push_str(&format!(
85            "    Null deviance: {:.4} on {} df\n",
86            self.null_deviance, self.df_null
87        ));
88        summary.push_str(&format!(
89            "Residual deviance: {:.4} on {} df\n",
90            self.deviance, self.df_residual
91        ));
92
93        if self.null_deviance > 0.0 {
94            let pseudo_r2 = 1.0 - self.deviance / self.null_deviance;
95            summary.push_str(&format!("Pseudo R-squared: {:.4}\n", pseudo_r2));
96        }
97
98        summary.push_str(&format!("AIC: {:.2}, BIC: {:.2}\n", self.aic, self.bic));
99
100        // Convergence info
101        summary.push_str(&format!(
102            "\nIRLS converged: {} ({} iterations)\n",
103            self.converged, self.iterations
104        ));
105
106        // Coefficients table
107        summary.push_str("\nCoefficients:\n");
108        summary.push_str(&format!(
109            "{:>20} {:>10} {:>10} {:>10} {:>10}\n",
110            " ", "Estimate", "Std. Error", "z value", "Pr(>|z|)"
111        ));
112        summary.push_str(&format!("{}\n", "-".repeat(60)));
113
114        for i in 0..n_coef {
115            let name = if i == 0 && intercept_included {
116                "(Intercept)".to_string()
117            } else if intercept_included {
118                feature_names
119                    .get(i)
120                    .cloned()
121                    .unwrap_or_else(|| format!("x{}", i))
122            } else {
123                feature_names
124                    .get(i)
125                    .cloned()
126                    .unwrap_or_else(|| format!("x{}", i))
127            };
128
129            let coef = self.coefficients[i];
130            let se = self.std_errors[i];
131            let z = self.z_values[i];
132            let p = self.p_values[i];
133
134            let significance = if p < 0.001 {
135                "***"
136            } else if p < 0.01 {
137                "**"
138            } else if p < 0.05 {
139                "*"
140            } else if p < 0.1 {
141                "."
142            } else {
143                ""
144            };
145
146            summary.push_str(&format!(
147                "{:>20} {:>10.4} {:>10.4} {:>10.4} {:>10.4} {}\n",
148                name, coef, se, z, p, significance
149            ));
150        }
151
152        summary.push_str(&format!("{}\n", "-".repeat(60)));
153        summary.push_str("Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n");
154
155        // Dispersion information
156        if (self.family == Family::Poisson || self.family == Family::Binomial)
157            && (self.scale - 1.0).abs() > 0.1
158        {
159            summary.push_str(&format!(
160                "\nWarning: Dispersion parameter is {:.3}, not 1.\n",
161                self.scale
162            ));
163            summary.push_str(&format!(
164                "Consider using quasi-{} family.\n",
165                self.family.name().to_lowercase()
166            ));
167        }
168
169        summary
170    }
171
172    /// Get the coefficients as a vector with names
173    pub fn coefficients_with_names(&self, feature_names: &[String]) -> Vec<(String, f64)> {
174        let n_coef = self.coefficients.len();
175        let mut result = Vec::with_capacity(n_coef);
176
177        for i in 0..n_coef {
178            let name = if i == 0 && self.intercept {
179                "(Intercept)".to_string()
180            } else if self.intercept {
181                feature_names
182                    .get(i)
183                    .cloned()
184                    .unwrap_or_else(|| format!("x{}", i))
185            } else {
186                feature_names
187                    .get(i)
188                    .cloned()
189                    .unwrap_or_else(|| format!("x{}", i))
190            };
191
192            result.push((name, self.coefficients[i]));
193        }
194
195        result
196    }
197
198    /// Get confidence intervals for coefficients
199    pub fn confidence_intervals(
200        &self,
201        alpha: f64,
202        feature_names: &[String],
203    ) -> Vec<(String, f64, f64)> {
204        let n_coef = self.coefficients.len();
205        let z_critical = 1.0 - alpha / 2.0;
206        let z_value = Normal::new(0.0, 1.0).unwrap().inverse_cdf(z_critical);
207
208        let mut intervals = Vec::with_capacity(n_coef);
209
210        for i in 0..n_coef {
211            let name = if i == 0 && self.intercept {
212                "(Intercept)".to_string()
213            } else if self.intercept {
214                feature_names
215                    .get(i)
216                    .cloned()
217                    .unwrap_or_else(|| format!("x{}", i))
218            } else {
219                feature_names
220                    .get(i)
221                    .cloned()
222                    .unwrap_or_else(|| format!("x{}", i))
223            };
224
225            let coef = self.coefficients[i];
226            let se = self.std_errors[i];
227
228            let lower = coef - z_value * se;
229            let upper = coef + z_value * se;
230
231            intervals.push((name, lower, upper));
232        }
233
234        intervals
235    }
236
237    /// Compute deviance residuals
238    pub fn deviance_residuals(&self, y: &Array1<f64>) -> Array1<f64> {
239        y.iter()
240            .zip(self.fitted_values.iter())
241            .map(|(&y_val, &mu_val)| {
242                let unit_deviance = self.family.unit_deviance(y_val, mu_val);
243                let sign = if y_val > mu_val { 1.0 } else { -1.0 };
244                sign * unit_deviance.sqrt()
245            })
246            .collect()
247    }
248
249    /// Compute Cook's distances for influence diagnostics
250    pub fn cooks_distance(&self) -> Array1<f64> {
251        let n = self.n_obs;
252        let p = self.n_params;
253
254        let mut cooks = Array1::zeros(n);
255
256        for i in 0..n {
257            let hii = self.hat_matrix_diag[i];
258            let pearson_resid = self.pearson_residuals[i];
259
260            if hii < 1.0 && self.scale > 0.0 {
261                cooks[i] =
262                    (pearson_resid.powi(2) / (p as f64 * self.scale)) * (hii / (1.0 - hii).powi(2));
263            }
264        }
265
266        cooks
267    }
268
269    /// Compute standardized Pearson residuals
270    pub fn standardized_pearson_residuals(&self) -> Array1<f64> {
271        let n = self.n_obs;
272        let mut std_residuals = Array1::zeros(n);
273
274        for i in 0..n {
275            let hii = self.hat_matrix_diag[i];
276            let pearson_resid = self.pearson_residuals[i];
277
278            if self.scale > 0.0 && hii < 1.0 {
279                std_residuals[i] = pearson_resid / ((self.scale * (1.0 - hii)).sqrt());
280            }
281        }
282
283        std_residuals
284    }
285
286    /// Test for overdispersion (for Poisson and Binomial)
287    pub fn overdispersion_test(&self) -> (f64, f64) {
288        let n = self.n_obs;
289        let p = self.n_params;
290
291        if self.family == Family::Poisson || self.family == Family::Binomial {
292            let pearson_chi2: f64 = self.pearson_residuals.iter().map(|r| r.powi(2)).sum();
293            let df = (n - p) as f64;
294
295            let test_stat = pearson_chi2 / df;
296            let p_value = 1.0 - statrs::function::gamma::gamma_ur(df / 2.0, df / 2.0 * test_stat);
297
298            (test_stat, p_value)
299        } else {
300            (f64::NAN, f64::NAN)
301        }
302    }
303
304    /// Compute log-likelihood (up to a constant)
305    pub fn log_likelihood(&self, y: &Array1<f64>) -> f64 {
306        let n = self.n_obs;
307        let mut ll = 0.0;
308
309        for i in 0..n {
310            let y_val = y[i];
311            let mu_val = self.fitted_values[i];
312
313            // Log-likelihood contributions (up to constant)
314            match self.family {
315                Family::Gaussian => {
316                    ll += -0.5 * (y_val - mu_val).powi(2) / self.scale;
317                }
318                Family::Binomial => {
319                    if y_val == 0.0 {
320                        ll += (1.0 - mu_val).ln().max(-100.0);
321                    } else if y_val == 1.0 {
322                        ll += mu_val.ln().max(-100.0);
323                    } else {
324                        ll += y_val * mu_val.ln().max(-100.0)
325                            + (1.0 - y_val) * (1.0 - mu_val).ln().max(-100.0);
326                    }
327                }
328                Family::Poisson => {
329                    ll += y_val * mu_val.ln().max(-100.0) - mu_val;
330                }
331                Family::Gamma => {
332                    // For Gamma with shape parameter = 1/scale
333                    let shape = 1.0 / self.scale;
334                    ll += shape * mu_val.ln().max(-100.0)
335                        - shape * y_val / mu_val
336                        - shape.ln().max(-100.0)
337                        - (shape - 1.0) * y_val.ln().max(-100.0);
338                }
339                Family::InverseGaussian => {
340                    // For Inverse Gaussian
341                    ll += -0.5 * (y_val - mu_val).powi(2) / (self.scale * mu_val.powi(2) * y_val);
342                }
343            }
344        }
345
346        ll
347    }
348
349    /// Compute pseudo R-squared measures
350    pub fn pseudo_r_squared(&self) -> (f64, f64, f64) {
351        // McFadden's R-squared
352        let mcfadden = 1.0 - self.deviance / self.null_deviance;
353
354        // Cox & Snell R-squared
355        let cox_snell = 1.0 - (self.deviance / self.null_deviance).powf(2.0 / self.n_obs as f64);
356
357        // Nagelkerke R-squared (adjusted Cox & Snell)
358        let nagelkerke = cox_snell / (1.0 - (-2.0 * self.null_deviance / self.n_obs as f64).exp());
359
360        (mcfadden, cox_snell, nagelkerke)
361    }
362
363    /// Perform likelihood ratio test against null model
364    pub fn likelihood_ratio_test(&self) -> (f64, f64) {
365        let lr_stat = self.null_deviance - self.deviance;
366        let df = (self.n_params - if self.intercept { 1 } else { 0 }) as f64;
367
368        let p_value = 1.0 - statrs::function::gamma::gamma_ur(df / 2.0, lr_stat / 2.0);
369
370        (lr_stat, p_value)
371    }
372
373    /// Get model information as a string
374    pub fn model_info(&self) -> String {
375        format!(
376            "GLM(family={}, link={}, intercept={}, n_obs={}, n_params={})",
377            self.family.name(),
378            self.link.name(),
379            self.intercept,
380            self.n_obs,
381            self.n_params
382        )
383    }
384}