Skip to main content

ndarray_glm/
data.rs

1//! Structs and utilities to represent input fit data.
2use crate::num::Float;
3use ndarray::{Array1, Array2, ArrayView2, Axis, concatenate, s};
4use num_traits::{FromPrimitive, One};
5use std::ops::{AddAssign, DivAssign, MulAssign, SubAssign};
6
7#[derive(Clone, Debug)]
8pub struct Dataset<F>
9where
10    F: Float,
11{
12    /// the observation of response data by event
13    pub y: Array1<F>,
14    /// the design matrix with observations in rows and covariates in columns
15    pub x: Array2<F>,
16    /// The offset in the linear predictor for each data point. This can be used
17    /// to fix the effect of control variables.
18    pub linear_offset: Option<Array1<F>>,
19    /// The variance weight of each observation (a.k.a. analytic weights)
20    pub weights: Option<Array1<F>>,
21    /// The frequency of each observation (traditionally positive integers)
22    pub freqs: Option<Array1<F>>,
23    /// Tracks whether the design matrix has a constant column of 1s prepended for the intercept
24    /// terms
25    pub(crate) has_intercept: bool,
26    /// If not None, indicates that the design matrix has been standardized and holds the
27    /// Standardizer object
28    pub(crate) standardizer: Option<Standardizer<F>>,
29}
30
31impl<F> Dataset<F>
32where
33    F: Float,
34{
35    /// Returns the linear predictors, i.e. the design matrix multiplied by the
36    /// regression parameters. Each entry in the resulting array is the linear
37    /// predictor for a given observation. If linear offsets for each
38    /// observation are provided, these are added to the linear predictors.
39    /// This is no longer a public function because it is designed to take regressors in the
40    /// standardized scale, and exposing that would be error-prone.
41    /// It should be agnostic to the question of the intercept.
42    pub(crate) fn linear_predictor_std(&self, regressors: &Array1<F>) -> Array1<F> {
43        let linear_predictor: Array1<F> = self.x.dot(regressors);
44        // Add linear offsets to the predictors if they are set
45        if let Some(lin_offset) = &self.linear_offset {
46            linear_predictor + lin_offset
47        } else {
48            linear_predictor
49        }
50    }
51
52    /// Total number of observations as given by the sum of the frequencies of observations
53    pub fn n_obs(&self) -> F {
54        match &self.freqs {
55            None => F::from(self.y.len()).unwrap(),
56            Some(f) => f.sum(),
57        }
58    }
59
60    /// Standardize the design matrix and store the standardizer object.
61    pub(crate) fn finalize_design_matrix(&mut self, transform: bool, use_intercept: bool) {
62        assert!(
63            self.standardizer.is_none(),
64            "This dataset is already marked as being standardized."
65        );
66        assert!(!self.has_intercept, "Intercept shouldn't already be set");
67        // This must be set before calling Standardizer::from_dataset
68        self.has_intercept = use_intercept;
69        if transform {
70            // NOTE: It's critical that the intercept is specified before passing to this from_dataset
71            // method, because it checks pretty much everything: weights, intercept, etc.
72            // This isn't a nice factorization, but at least this mess is all internal.
73            let standardizer = Standardizer::from_dataset(self);
74            standardizer.transform_inplace(&mut self.x);
75            self.standardizer = Some(standardizer);
76        }
77        // Pad the ones after standardization for simplicity
78        if use_intercept {
79            self.x = one_pad(self.x.view());
80        }
81    }
82
83    /// Returns the effective sample size corrected for the design effect. This exposes the sum of
84    /// the squares of the variance weights.
85    pub fn n_eff(&self) -> F {
86        match &self.weights {
87            None => self.n_obs(),
88            Some(w) => {
89                let v1 = self.freq_sum(w);
90                let w2 = w * w;
91                let v2 = self.freq_sum(&w2);
92                v1 * v1 / v2
93            }
94        }
95    }
96
97    /// Multiply the input by the frequency weights
98    pub(crate) fn apply_freq_weights(&self, rhs: Array1<F>) -> Array1<F> {
99        match &self.freqs {
100            None => rhs,
101            Some(f) => f * rhs,
102        }
103    }
104
105    /// multiply the input vector element-wise by the weights, if they exist
106    pub(crate) fn apply_total_weights(&self, rhs: Array1<F>) -> Array1<F> {
107        self.apply_freq_weights(self.apply_var_weights(rhs))
108    }
109
110    pub(crate) fn apply_var_weights(&self, rhs: Array1<F>) -> Array1<F> {
111        match &self.weights {
112            None => rhs,
113            Some(w) => w * rhs,
114        }
115    }
116
117    /// Sum over the input array using the frequencies (and not the variance weights) as weights.
118    /// This is a useful operation because the frequency weights fundamentally impact the sum
119    /// operator and nothing else.
120    pub(crate) fn freq_sum(&self, rhs: &Array1<F>) -> F {
121        self.apply_freq_weights(rhs.clone()).sum()
122    }
123
124    pub(crate) fn get_variance_weights(&self) -> Array1<F> {
125        match &self.weights {
126            Some(w) => w.clone(),
127            None => Array1::<F>::ones(self.y.len()),
128        }
129    }
130
131    /// Returns the sum of the weights, or the number of observations if the weights are all equal
132    /// to 1.
133    pub(crate) fn sum_weights(&self) -> F {
134        match &self.weights {
135            None => self.n_obs(),
136            Some(w) => self.freq_sum(w),
137        }
138    }
139
140    /// Return the weighted sum of the RHS, where both frequency and variance weights are used.
141    pub(crate) fn weighted_sum(&self, rhs: &Array1<F>) -> F {
142        self.freq_sum(&self.apply_var_weights(rhs.clone()))
143    }
144
145    /// Returns the weighted transpose of the feature data
146    /// TODO: Consider making this weighted-conjugate operation separate from the x data, so it can
147    /// be applied in multiple places (e.g. x_conj_ext())
148    // TODO: Consider cacheing this as it's used in several places in fit.rs.
149    // Also consider if any of the other modified-X helpers should be.
150    pub(crate) fn x_conj(&self) -> Array2<F> {
151        let xt = self.x.t().to_owned();
152        let xt = match &self.freqs {
153            None => xt,
154            Some(f) => xt * f,
155        };
156        match &self.weights {
157            None => xt,
158            Some(w) => xt * w,
159        }
160    }
161
162    /// Returns the external data matrix, scaled back to the original level. The intercept term is
163    /// included so that this can be used in some fit statistic matrix multiplications.
164    pub(crate) fn x_ext(&self) -> Array2<F> {
165        let x_orig = self.x_orig();
166        if self.has_intercept {
167            // The 0th column should just be ones. We could also just use an array of 1s, but we'll
168            // pass the values themselves just in case some other value was ever used.
169            concatenate![Axis(1), self.x.slice(s![.., ..1]), x_orig]
170        } else {
171            x_orig
172        }
173    }
174
175    /// Returns the data matrix in the original state it was input, without an intercept column.
176    /// There may be floating-point differences due to the back-transformation.
177    /// NOTE: This does some unnecessary clones if there is no standardization. Perhaps we want to
178    /// maintain a reference to the original matrix and use copy-on-write to optionally a
179    /// standardized version.
180    pub(crate) fn x_orig(&self) -> Array2<F> {
181        let x = if self.has_intercept {
182            self.x.slice(s![.., 1..]).to_owned()
183        } else {
184            self.x.clone()
185        };
186        match &self.standardizer {
187            Some(std) => std.inverse_transform(x),
188            None => x,
189        }
190    }
191
192    /// Return the weighted transpose of the feature data in the original un-standardized scale
193    pub(crate) fn x_conj_ext(&self) -> Array2<F> {
194        let x_ext = self.x_ext().t().to_owned();
195        let xt = match &self.freqs {
196            None => x_ext,
197            Some(f) => x_ext * f,
198        };
199        match &self.weights {
200            None => xt,
201            Some(w) => xt * w,
202        }
203    }
204
205    /// Transform the parameters from standardized space back into the external one.
206    pub(crate) fn inverse_transform_beta(&self, beta: Array1<F>) -> Array1<F> {
207        match &self.standardizer {
208            Some(std) => {
209                if self.has_intercept {
210                    std.inverse_transform_coefficients(beta)
211                } else {
212                    std.inverse_transform_coefficients_no_int(beta)
213                }
214            }
215            None => beta,
216        }
217    }
218
219    /// Transform external parameters into internal standardized ones.
220    pub(crate) fn transform_beta(&self, beta: Array1<F>) -> Array1<F> {
221        match &self.standardizer {
222            Some(std) => {
223                if self.has_intercept {
224                    std.transform_coefficients(beta)
225                } else {
226                    std.transform_coefficients_no_int(beta)
227                }
228            }
229            None => beta,
230        }
231    }
232
233    /// Transform an internal Fisher matrix d^2/d\beta'^2 to an external representation
234    /// d^2/d\beta^2.
235    pub(crate) fn inverse_transform_fisher(&self, fisher: Array2<F>) -> Array2<F> {
236        match &self.standardizer {
237            Some(std) => {
238                if self.has_intercept {
239                    std.inverse_transform_fisher(fisher)
240                } else {
241                    std.inverse_transform_fisher_no_int(fisher)
242                }
243            }
244            None => fisher,
245        }
246    }
247
248    /// Transform a score (gradient) vector from the internal standardized parameter basis to the
249    /// external one: `score_ext = J^T score_int` where `J = d(beta_std)/d(beta_ext)`.
250    pub(crate) fn inverse_transform_score(&self, score: Array1<F>) -> Array1<F> {
251        match &self.standardizer {
252            Some(std) => {
253                if self.has_intercept {
254                    std.inverse_transform_score(score)
255                } else {
256                    std.inverse_transform_score_no_int(score)
257                }
258            }
259            None => score,
260        }
261    }
262}
263
264/// Stores the per-column means and sample standard deviations learned from a design matrix, and
265/// can apply the same transformation to new data.
266///
267/// This is useful when fitting a regularized model: standardize the training
268/// data with [`Dataset::standardize`], then apply the same transformation
269/// to held-out inputs via [`Standardizer::transform`] inside of
270/// [`Fit::predict`](crate::fit::Fit::predict). Alternatively, use
271/// [`Standardizer::inverse_transform_coefficients`] to convert the fitted
272/// coefficients back to the original scale, however this will result only in the linear predictor
273/// and one would have to apply the inverse link function in order to extract predicted
274/// $`y`$-values.
275///
276/// # Degrees of freedom
277///
278/// Standard deviations are computed with ddof=1 (sample std), consistent with
279/// unbiased estimation from a training set.
280///
281/// # Zero-variance columns
282///
283/// Columns with zero variance are not scaled (i.e. scaled by 1).
284#[derive(Clone, Debug)]
285pub(crate) struct Standardizer<F> {
286    /// Per-column means.
287    pub shifts: Array1<F>,
288    /// Per-column sample standard deviations (ddof=1). May contain zeros for
289    /// constant columns.
290    pub scales: Array1<F>,
291}
292
293impl<F> Standardizer<F>
294where
295    F: Float + FromPrimitive + std::ops::DivAssign,
296{
297    /// Compute per-column means and sample standard deviations from `data`, using the `x` values
298    /// as well as the weights, if present.
299    ///
300    /// The means are given by the (possibly weight) average of the values. The standard variances
301    /// are the weighted average of the squared deviations from the weighted mean, times a bias
302    /// term $`1/(1-1/n_eff)`$.
303    ///
304    /// For empty data (0 rows) the mean defaults to 0 and the std to 1.
305    /// For a single row the mean is that row's value and the std defaults to 1
306    /// (sample std is undefined with one observation).
307    fn from_dataset(data: &Dataset<F>) -> Self {
308        let x = &data.x;
309        let (n, p) = (x.nrows(), x.ncols());
310        let weights: Array1<F> = data.apply_total_weights(Array1::<F>::ones(n));
311        let sum_weights: F = weights.sum();
312        // Change the shape to broadcast to Array2 weighting
313        let weights: Array2<F> = weights.insert_axis(Axis(1));
314        let n_eff = data.n_eff();
315        let means = if n == 0 {
316            Array1::<F>::zeros(p)
317        } else {
318            let x_w: Array2<F> = &weights * x;
319            x_w.sum_axis(Axis(0)) / sum_weights
320        };
321
322        let vars: Array1<F> = if n <= 1 {
323            Array1::<F>::ones(p)
324        } else {
325            assert!(n_eff > F::one());
326            let dx = x - means.clone();
327            let dx2: Array2<F> = dx.mapv_into(|d| d * d);
328            let dx2_w = weights * dx2;
329            let vars = dx2_w.sum_axis(Axis(0)) / sum_weights;
330            let bias = n_eff / (n_eff - F::one());
331            vars * bias
332        };
333        // For columns with zero variance, don't scale.
334        let scales = vars
335            .mapv(|v| if v > F::zero() { v } else { F::one() })
336            .sqrt();
337        // We need the actual means to compute the scales, but if we're not using the intercept,
338        // the shifts should be returned to zero.
339        let shifts = if data.has_intercept {
340            means
341        } else {
342            Array1::<F>::zeros(p)
343        };
344
345        Self { shifts, scales }
346    }
347
348    /// Apply the fitted standardization to `x`, in-place on a mutable array.
349    ///
350    /// Each column has its training mean subtracted and is divided by the training standard
351    /// deviation. Columns whose training std was zero are only adjusted by the mean. The parameter
352    /// should be zero in this case anyway (or perhaps undefined with no regularization), so this
353    /// prevents predictions being impacted from numerical imprecisions.
354    fn transform_inplace(&self, x: &mut Array2<F>) {
355        x.sub_assign(&self.shifts);
356        x.div_assign(&self.scales);
357    }
358
359    /// Apply the inverse transformation to get back the original data.
360    /// This method is crate-public so that the exact LOO computation can appropriately account for
361    /// regularization.
362    pub(crate) fn inverse_transform(&self, x: Array2<F>) -> Array2<F> {
363        (x * &self.scales) + &self.shifts
364    }
365
366    /// Convert coefficient estimates from the standardized scale back to the
367    /// original predictor scale. This provides coefficients that can be used to
368    ///
369    /// `beta` must have length equal to the rank $`K`$ which is 1 longer than the means and stds.
370    /// If a non-intercept coefficient vector, prepend it with a zero before passing it as the
371    /// transformed coefficients will have an intercept term regardless.
372    ///
373    /// The back-transformation is:
374    ///
375    /// ```math
376    /// \tilde\beta_j = \beta_j / \sigma_j \qquad j = 1,\ldots,K-1
377    /// ```
378    ///
379    /// ```math
380    /// \tilde\beta_0 = \beta_0 - \sum_j \beta_j\,\mu_j / \sigma_j
381    /// ```
382    ///
383    /// Predictors with zero standard deviation contribute zero to both (the
384    /// fit cannot identify those coefficients).
385    ///
386    /// # Errors
387    ///
388    /// Panics if the arrays are the wrong size, like all array operations.
389    fn inverse_transform_coefficients(&self, mut beta: Array1<F>) -> Array1<F> {
390        // The scales are the std devs, but fallback to no scaling if there is a std of zero.
391        // We'd expect columns with zero variance to result in a beta of zero anyway, but we'll try
392        // to handle it precisely anyway in case some assumption is broken.
393        // Scale the coefficients.
394        beta.slice_mut(s![1..]).div_assign(&self.scales);
395        // Adjust the intercept term
396        let intercept_adjust: F = (self.shifts.clone() * beta.slice(s![1..])).sum();
397        beta[0] -= intercept_adjust;
398
399        beta
400    }
401
402    /// Do the inverse transform on the coefficients in the no-intercept context.
403    /// Here scaling is applied, but not shifting.
404    fn inverse_transform_coefficients_no_int(&self, beta: Array1<F>) -> Array1<F> {
405        beta / &self.scales
406    }
407
408    /// Transform the coefficients from external back to internal (standardized). This should not
409    /// be a public function as the user should be able to engage only with the external scale.
410    fn transform_coefficients(&self, mut beta: Array1<F>) -> Array1<F> {
411        let intercept_adjust = (self.shifts.clone() * beta.slice(s![1..])).sum();
412        beta[0] += intercept_adjust;
413        beta.slice_mut(s![1..]).mul_assign(&self.scales);
414        beta
415    }
416
417    /// Transform the no-intercept coefficients from external back to internal (standardized).
418    /// In the no-intercept
419    fn transform_coefficients_no_int(&self, beta: Array1<F>) -> Array1<F> {
420        beta * &self.scales
421    }
422
423    /// Transform the Fisher information matrix from the internal standardized representation to
424    /// the external one. The fisher information is the 2nd derivative of the log-likelihood with
425    /// respect to the parameters, so the transformation needs to multiply by the Jacobian and it's
426    /// transpose on each end. This Jacobian is given by $`\frac{\partial \beta'_i}{\partial
427    /// \beta_j}`$.
428    fn inverse_transform_fisher(&self, mut fisher: Array2<F>) -> Array2<F> {
429        // Express the shifts and scales as column vectors
430        // let mu_vec = self.means.clone().insert_axis(Axis(1));
431        let scales = &self.scales;
432        let f00 = fisher[[0, 0]];
433        // The order of these in-place multiplications is important. We're trying to reduce clones
434        // by doing it in this order.
435        let block_mult: Array2<F> = scales.clone().insert_axis(Axis(1)) * scales.t();
436        // l_kk -> sigma * l_kk * sigma
437        fisher.slice_mut(s![1.., 1..]).mul_assign(&block_mult);
438        // l_k -> sigma * l_k
439        fisher.slice_mut(s![1.., 0]).mul_assign(scales);
440        fisher.slice_mut(s![0, 1..]).mul_assign(scales);
441        // f00 shows up scaled by mu in the rest of the terms.
442        let f00_mu = self.shifts.clone() * f00;
443        // Add the sigma_kk * l_k * mu_k^T and the other side to the block. Don't assume that
444        // fisher is symmetric, just in case.
445        // We're using the fact that the vector components of fisher have already been scaled by
446        // sigma.
447        // Each of the 3 terms is an outer product of a column vector and a row vector.
448        // Note: .t() on a 1D array is a no-op in ndarray, so outer products require insert_axis.
449        let row0 = fisher.slice(s![0, 1..]).to_owned().insert_axis(Axis(0));
450        let col0 = fisher.slice(s![1.., 0]).to_owned().insert_axis(Axis(1));
451        let mu_col = self.shifts.clone().insert_axis(Axis(1));
452        let mu_row = self.shifts.clone().insert_axis(Axis(0));
453        let block_terms: Array2<F> =
454            &mu_col * &row0 + &col0 * &mu_row + &mu_col * &f00_mu.clone().insert_axis(Axis(0));
455        fisher.slice_mut(s![1.., 1..]).add_assign(&block_terms);
456        // Now we add the f0*mu terms to the vector portions
457        fisher.slice_mut(s![1.., 0]).add_assign(&f00_mu);
458        fisher.slice_mut(s![0, 1..]).add_assign(&f00_mu);
459
460        fisher
461    }
462
463    /// Tranform the Fisher information matrix from the internal beta' representation to the
464    /// external beta representation, when no shifting is used due to the lack of an intercept.
465    /// The matrix part of the Jacobian is diagonal, so we don't need to do full matrix
466    /// multiplications.
467    fn inverse_transform_fisher_no_int(&self, fisher: Array2<F>) -> Array2<F> {
468        // Get a row vector of the scales
469        let std = self.scales.to_owned().insert_axis(Axis(0));
470        // The diagonal std matrix multiplies the fisher block from both sides, which should be
471        // equivalent to this element-wise multiplication
472        fisher * std.t() * std
473    }
474
475    /// Transform a gradient from the internal standardized basis to external: `score_ext = J^T score_int`.
476    ///
477    /// With intercept:
478    /// - `score_ext[0] = score_int[0]`
479    /// - `score_ext[j] = mu_j * score_int[0] + sigma_j * score_int[j]` for j > 0
480    fn inverse_transform_score(&self, mut score: Array1<F>) -> Array1<F> {
481        let s0 = score[0];
482        score.slice_mut(s![1..]).mul_assign(&self.scales);
483        score
484            .slice_mut(s![1..])
485            .add_assign(&(self.shifts.clone() * s0));
486        score
487    }
488
489    /// Transform a gradient from internal to external when there is no intercept.
490    /// `J = D_sigma`, so `score_ext = D_sigma * score_int`.
491    /// This ends up being the same computation as transform_coefficients_no_int(), but since it's
492    /// not true of the intercept version, we'll keep this function separate for now to have
493    /// consistent organization.
494    fn inverse_transform_score_no_int(&self, score: Array1<F>) -> Array1<F> {
495        score * &self.scales
496    }
497}
498
499/// Prepend the input with a column of ones.
500/// Used to incorporate a constant intercept term in a regression.
501pub(crate) fn one_pad<T>(data: ArrayView2<T>) -> Array2<T>
502where
503    T: Copy + One,
504{
505    // create the ones column
506    let ones: Array2<T> = Array2::ones((data.nrows(), 1));
507    // This should be guaranteed to succeed since we are manually specifying the dimension
508    concatenate![Axis(1), ones, data]
509}
510
511#[cfg(test)]
512mod tests {
513    use super::*;
514    use approx::assert_abs_diff_eq;
515    use ndarray::array;
516
517    impl<F: Float> Standardizer<F> {
518        /// This method isn't used in the crate since we have transform_inplace() to avoid copying,
519        /// but it's a useful utility in these tests.
520        fn transform(&self, mut x: Array2<F>) -> Array2<F> {
521            self.transform_inplace(&mut x);
522            x
523        }
524    }
525
526    fn make_dataset(x: Array2<f64>) -> Dataset<f64> {
527        let n = x.nrows();
528        let mut ds = Dataset {
529            y: Array1::zeros(n),
530            x,
531            linear_offset: None,
532            weights: None,
533            freqs: None,
534            has_intercept: false,
535            standardizer: None,
536        };
537        ds.finalize_design_matrix(true, true);
538        ds
539    }
540
541    fn get_standardizer(x: Array2<f64>) -> Standardizer<f64> {
542        let ds = make_dataset(x);
543        ds.standardizer.unwrap()
544    }
545
546    // Basic fit: check means and sample stds (ddof=1).
547    #[test]
548    fn fit_means_and_stds() {
549        let x = array![[1.0_f64, 4.0], [2.0, 6.0], [3.0, 8.0]];
550        let s = get_standardizer(x);
551        assert_abs_diff_eq!(s.shifts, array![2.0, 6.0], epsilon = 1e-12);
552        // sample std: col0 = 1.0, col1 = 2.0
553        assert_abs_diff_eq!(s.scales, array![1.0, 2.0], epsilon = 1e-12);
554    }
555
556    // Verify ddof=1, not ddof=0, by a case where they differ.
557    #[test]
558    fn fit_uses_sample_std() {
559        // Two observations: ddof=1 gives sqrt(2), ddof=0 gives 1.
560        let x = array![[1.0_f64], [3.0]];
561        let s = get_standardizer(x);
562        assert_abs_diff_eq!(s.scales[0], 2.0_f64.sqrt(), epsilon = 1e-12);
563    }
564
565    // transform produces zero-mean, unit-variance columns.
566    #[test]
567    fn transform_standardized() {
568        let x = array![[1.0_f64, 4.0], [2.0, 6.0], [3.0, 8.0]];
569        let s = get_standardizer(x.clone());
570        let x_std = s.transform(x);
571        assert_abs_diff_eq!(
572            x_std,
573            array![[-1.0, -1.0], [0.0, 0.0], [1.0, 1.0]],
574            epsilon = 1e-12
575        );
576    }
577
578    // from_dataset then transform is idempotent (same result called twice).
579    #[test]
580    fn fit_transform_consistent() {
581        let x = array![[1.0_f64, 4.0], [2.0, 6.0], [3.0, 8.0]];
582        let s = get_standardizer(x.clone());
583        let x_std = s.transform(x.clone());
584        assert_abs_diff_eq!(s.transform(x), x_std, epsilon = 1e-12);
585    }
586
587    // n=0: means default to 0, stds default to 1.
588    #[test]
589    fn fit_empty() {
590        let x = Array2::<f64>::zeros((0, 3));
591        let s = get_standardizer(x);
592        assert_abs_diff_eq!(s.shifts, array![0.0, 0.0, 0.0], epsilon = 1e-12);
593        assert_abs_diff_eq!(s.scales, array![1.0, 1.0, 1.0], epsilon = 1e-12);
594    }
595
596    // n=1: mean is the single row's value, std defaults to 1.
597    #[test]
598    fn fit_single_row() {
599        let x = array![[3.0_f64, 7.0]];
600        let s = get_standardizer(x.clone());
601        assert_abs_diff_eq!(s.shifts, array![3.0, 7.0], epsilon = 1e-12);
602        assert_abs_diff_eq!(s.scales, array![1.0, 1.0], epsilon = 1e-12);
603        // In-sample transform should give zeros.
604        let x_std = s.transform(x);
605        assert_abs_diff_eq!(x_std, array![[0.0, 0.0]], epsilon = 1e-12);
606    }
607
608    // Constant column: stored std is 0, transform doesn't scale them.
609    #[test]
610    fn transform_constant_column() {
611        let x = array![[1.0_f64, 5.0], [2.0, 5.0], [3.0, 5.0]];
612        let s = get_standardizer(x.clone());
613        assert_abs_diff_eq!(s.scales[1], 1.0, epsilon = 1e-12);
614        let x_std = s.transform(x);
615        // non-constant column standardizes normally; constant column is zeroed by the shift
616        assert_abs_diff_eq!(
617            x_std.column(1).to_owned(),
618            array![0.0, 0.0, 0.0],
619            epsilon = 1e-12
620        );
621    }
622
623    // inverse_transform_coefficients with intercept: slopes scale by 1/std,
624    // intercept absorbs the centering correction.
625    #[test]
626    fn inverse_transform_with_intercept() {
627        let x = array![[1.0_f64, 4.0], [2.0, 6.0], [3.0, 8.0]];
628        let s = get_standardizer(x); // means=[2,6], stds=[1,2]
629        let beta_std = array![0.5_f64, 2.0, 3.0]; // intercept, slope0, slope1
630        let beta_raw = s.inverse_transform_coefficients(beta_std);
631        // slope0: 2.0 / 1.0 = 2.0
632        // slope1: 3.0 / 2.0 = 1.5
633        // intercept: 0.5 - (2.0*2.0/1.0 + 3.0*6.0/2.0) = 0.5 - (4.0 + 9.0) = -12.5
634        assert_abs_diff_eq!(beta_raw, array![-12.5_f64, 2.0, 1.5], epsilon = 1e-12);
635    }
636
637    // inverse_transform_coefficients without intercept: prepend zero for the
638    // intercept slot as documented, then verify only the slope elements.
639    #[test]
640    fn inverse_transform_no_intercept() {
641        let x = array![[1.0_f64, 4.0], [2.0, 6.0], [3.0, 8.0]];
642        let s = get_standardizer(x); // means=[2,6], stds=[1,2]
643        let beta_std = array![0.0_f64, 2.0, 3.0]; // zero intercept prepended
644        let beta_raw = s.inverse_transform_coefficients(beta_std);
645        // slope0: 2.0 / 1.0 = 2.0
646        // slope1: 3.0 / 2.0 = 1.5
647        assert_abs_diff_eq!(
648            beta_raw.slice(s![1..]).to_owned(),
649            array![2.0_f64, 1.5],
650            epsilon = 1e-12
651        );
652    }
653
654    // inverse_transform_coefficients with a zero-std column: that coefficient
655    // maps to zero, but it does contribute to the zero column. Note that this is unrealistic in
656    // standard applications, since a column of zero variance should lead to a beta of about zero.
657    #[test]
658    fn inverse_transform_zero_std_column() {
659        let x = array![[1.0_f64, 5.0], [2.0, 5.0], [3.0, 5.0]];
660        let s = get_standardizer(x); // means=[2,5], stds=[1,0]
661        let beta_std = array![0.5_f64, 2.0, 3.0];
662        let beta_raw = s.inverse_transform_coefficients(beta_std);
663        // slope0: 2.0 / 1.0 = 2.0
664        // slope1: zero std → 0.0
665        // intercept: 0.5 - (2.0*2.0/1.0) - (3.0*5.0/1.0) = 0.5 - 4.0 - 15.0 = -18.5
666        // The last element has zero std, so we don't scale it at all.
667        assert_abs_diff_eq!(beta_raw, array![-18.5_f64, 2.0, 3.0], epsilon = 1e-12);
668    }
669
670    // inverse_transform_coefficients returns an error for wrong-length input.
671    #[test]
672    #[should_panic]
673    fn inverse_transform_bad_length() {
674        let x = array![[1.0_f64, 4.0], [2.0, 6.0], [3.0, 8.0]];
675        let s = get_standardizer(x); // p=2, expects length 3
676        let bad = array![1.0_f64, 2.0, 3.0, 4.0]; // length 4
677        let _ = s.inverse_transform_coefficients(bad);
678    }
679
680    // Round-trip: standardized → external → standardized recovers the original.
681    #[test]
682    fn transform_inverse_transform_roundtrip() {
683        let x = array![[1.0_f64, 4.0], [2.0, 6.0], [3.0, 8.0]];
684        let s = get_standardizer(x); // means=[2,6], stds=[1,2]
685        let beta_std = array![0.5_f64, 2.0, 3.0];
686        let beta_ext = s.inverse_transform_coefficients(beta_std.clone());
687        let recovered = s.transform_coefficients(beta_ext);
688        assert_abs_diff_eq!(recovered, beta_std, epsilon = 1e-12);
689    }
690
691    // Round-trip: external → standardized → external recovers the original.
692    #[test]
693    fn inverse_transform_transform_roundtrip() {
694        let x = array![[1.0_f64, 4.0], [2.0, 6.0], [3.0, 8.0]];
695        let s = get_standardizer(x); // means=[2,6], stds=[1,2]
696        let beta_ext = array![-12.5_f64, 2.0, 1.5];
697        let beta_std = s.transform_coefficients(beta_ext.clone());
698        let recovered = s.inverse_transform_coefficients(beta_std);
699        assert_abs_diff_eq!(recovered, beta_ext, epsilon = 1e-12);
700    }
701
702    // Round-trips hold even when a column has zero variance (std=0).
703    #[test]
704    fn roundtrip_zero_std_column() {
705        let x = array![[1.0_f64, 5.0], [2.0, 5.0], [3.0, 5.0]];
706        let s = get_standardizer(x); // means=[2,5], stds=[1,0]
707
708        let beta_std = array![0.5_f64, 2.0, 3.0];
709        let beta_ext = s.inverse_transform_coefficients(beta_std.clone());
710        let recovered_std = s.transform_coefficients(beta_ext);
711        assert_abs_diff_eq!(recovered_std, beta_std, epsilon = 1e-12);
712
713        let beta_ext2 = array![-3.5_f64, 2.0, 3.0];
714        let beta_std2 = s.transform_coefficients(beta_ext2.clone());
715        let recovered_ext = s.inverse_transform_coefficients(beta_std2);
716        assert_abs_diff_eq!(recovered_ext, beta_ext2, epsilon = 1e-12);
717    }
718}