ndarray_glm/data.rs
1//! Structs and utilities to represent input fit data.
2use crate::num::Float;
3use ndarray::{Array1, Array2, ArrayView2, Axis, concatenate, s};
4use num_traits::{FromPrimitive, One};
5use std::ops::{AddAssign, DivAssign, MulAssign, SubAssign};
6
7#[derive(Clone, Debug)]
8pub struct Dataset<F>
9where
10 F: Float,
11{
12 /// the observation of response data by event
13 pub y: Array1<F>,
14 /// the design matrix with observations in rows and covariates in columns
15 pub x: Array2<F>,
16 /// The offset in the linear predictor for each data point. This can be used
17 /// to fix the effect of control variables.
18 pub linear_offset: Option<Array1<F>>,
19 /// The variance weight of each observation (a.k.a. analytic weights)
20 pub weights: Option<Array1<F>>,
21 /// The frequency of each observation (traditionally positive integers)
22 pub freqs: Option<Array1<F>>,
23 /// Tracks whether the design matrix has a constant column of 1s prepended for the intercept
24 /// terms
25 pub(crate) has_intercept: bool,
26 /// If not None, indicates that the design matrix has been standardized and holds the
27 /// Standardizer object
28 pub(crate) standardizer: Option<Standardizer<F>>,
29}
30
31impl<F> Dataset<F>
32where
33 F: Float,
34{
35 /// Returns the linear predictors, i.e. the design matrix multiplied by the
36 /// regression parameters. Each entry in the resulting array is the linear
37 /// predictor for a given observation. If linear offsets for each
38 /// observation are provided, these are added to the linear predictors.
39 /// This is no longer a public function because it is designed to take regressors in the
40 /// standardized scale, and exposing that would be error-prone.
41 /// It should be agnostic to the question of the intercept.
42 pub(crate) fn linear_predictor_std(&self, regressors: &Array1<F>) -> Array1<F> {
43 let linear_predictor: Array1<F> = self.x.dot(regressors);
44 // Add linear offsets to the predictors if they are set
45 if let Some(lin_offset) = &self.linear_offset {
46 linear_predictor + lin_offset
47 } else {
48 linear_predictor
49 }
50 }
51
52 /// Total number of observations as given by the sum of the frequencies of observations
53 pub fn n_obs(&self) -> F {
54 match &self.freqs {
55 None => F::from(self.y.len()).unwrap(),
56 Some(f) => f.sum(),
57 }
58 }
59
60 /// Standardize the design matrix and store the standardizer object.
61 pub(crate) fn finalize_design_matrix(&mut self, transform: bool, use_intercept: bool) {
62 assert!(
63 self.standardizer.is_none(),
64 "This dataset is already marked as being standardized."
65 );
66 assert!(!self.has_intercept, "Intercept shouldn't already be set");
67 // This must be set before calling Standardizer::from_dataset
68 self.has_intercept = use_intercept;
69 if transform {
70 // NOTE: It's critical that the intercept is specified before passing to this from_dataset
71 // method, because it checks pretty much everything: weights, intercept, etc.
72 // This isn't a nice factorization, but at least this mess is all internal.
73 let standardizer = Standardizer::from_dataset(self);
74 standardizer.transform_inplace(&mut self.x);
75 self.standardizer = Some(standardizer);
76 }
77 // Pad the ones after standardization for simplicity
78 if use_intercept {
79 self.x = one_pad(self.x.view());
80 }
81 }
82
83 /// Returns the effective sample size corrected for the design effect. This exposes the sum of
84 /// the squares of the variance weights.
85 pub fn n_eff(&self) -> F {
86 match &self.weights {
87 None => self.n_obs(),
88 Some(w) => {
89 let v1 = self.freq_sum(w);
90 let w2 = w * w;
91 let v2 = self.freq_sum(&w2);
92 v1 * v1 / v2
93 }
94 }
95 }
96
97 /// Multiply the input by the frequency weights
98 pub(crate) fn apply_freq_weights(&self, rhs: Array1<F>) -> Array1<F> {
99 match &self.freqs {
100 None => rhs,
101 Some(f) => f * rhs,
102 }
103 }
104
105 /// multiply the input vector element-wise by the weights, if they exist
106 pub(crate) fn apply_total_weights(&self, rhs: Array1<F>) -> Array1<F> {
107 self.apply_freq_weights(self.apply_var_weights(rhs))
108 }
109
110 pub(crate) fn apply_var_weights(&self, rhs: Array1<F>) -> Array1<F> {
111 match &self.weights {
112 None => rhs,
113 Some(w) => w * rhs,
114 }
115 }
116
117 /// Sum over the input array using the frequencies (and not the variance weights) as weights.
118 /// This is a useful operation because the frequency weights fundamentally impact the sum
119 /// operator and nothing else.
120 pub(crate) fn freq_sum(&self, rhs: &Array1<F>) -> F {
121 self.apply_freq_weights(rhs.clone()).sum()
122 }
123
124 pub(crate) fn get_variance_weights(&self) -> Array1<F> {
125 match &self.weights {
126 Some(w) => w.clone(),
127 None => Array1::<F>::ones(self.y.len()),
128 }
129 }
130
131 /// Returns the sum of the weights, or the number of observations if the weights are all equal
132 /// to 1.
133 pub(crate) fn sum_weights(&self) -> F {
134 match &self.weights {
135 None => self.n_obs(),
136 Some(w) => self.freq_sum(w),
137 }
138 }
139
140 /// Return the weighted sum of the RHS, where both frequency and variance weights are used.
141 pub(crate) fn weighted_sum(&self, rhs: &Array1<F>) -> F {
142 self.freq_sum(&self.apply_var_weights(rhs.clone()))
143 }
144
145 /// Returns the weighted transpose of the feature data
146 /// TODO: Consider making this weighted-conjugate operation separate from the x data, so it can
147 /// be applied in multiple places (e.g. x_conj_ext())
148 // TODO: Consider cacheing this as it's used in several places in fit.rs.
149 // Also consider if any of the other modified-X helpers should be.
150 pub(crate) fn x_conj(&self) -> Array2<F> {
151 let xt = self.x.t().to_owned();
152 let xt = match &self.freqs {
153 None => xt,
154 Some(f) => xt * f,
155 };
156 match &self.weights {
157 None => xt,
158 Some(w) => xt * w,
159 }
160 }
161
162 /// Returns the external data matrix, scaled back to the original level. The intercept term is
163 /// included so that this can be used in some fit statistic matrix multiplications.
164 pub(crate) fn x_ext(&self) -> Array2<F> {
165 let x_orig = self.x_orig();
166 if self.has_intercept {
167 // The 0th column should just be ones. We could also just use an array of 1s, but we'll
168 // pass the values themselves just in case some other value was ever used.
169 concatenate![Axis(1), self.x.slice(s![.., ..1]), x_orig]
170 } else {
171 x_orig
172 }
173 }
174
175 /// Returns the data matrix in the original state it was input, without an intercept column.
176 /// There may be floating-point differences due to the back-transformation.
177 /// NOTE: This does some unnecessary clones if there is no standardization. Perhaps we want to
178 /// maintain a reference to the original matrix and use copy-on-write to optionally a
179 /// standardized version.
180 pub(crate) fn x_orig(&self) -> Array2<F> {
181 let x = if self.has_intercept {
182 self.x.slice(s![.., 1..]).to_owned()
183 } else {
184 self.x.clone()
185 };
186 match &self.standardizer {
187 Some(std) => std.inverse_transform(x),
188 None => x,
189 }
190 }
191
192 /// Return the weighted transpose of the feature data in the original un-standardized scale
193 pub(crate) fn x_conj_ext(&self) -> Array2<F> {
194 let x_ext = self.x_ext().t().to_owned();
195 let xt = match &self.freqs {
196 None => x_ext,
197 Some(f) => x_ext * f,
198 };
199 match &self.weights {
200 None => xt,
201 Some(w) => xt * w,
202 }
203 }
204
205 /// Transform the parameters from standardized space back into the external one.
206 pub(crate) fn inverse_transform_beta(&self, beta: Array1<F>) -> Array1<F> {
207 match &self.standardizer {
208 Some(std) => {
209 if self.has_intercept {
210 std.inverse_transform_coefficients(beta)
211 } else {
212 std.inverse_transform_coefficients_no_int(beta)
213 }
214 }
215 None => beta,
216 }
217 }
218
219 /// Transform external parameters into internal standardized ones.
220 pub(crate) fn transform_beta(&self, beta: Array1<F>) -> Array1<F> {
221 match &self.standardizer {
222 Some(std) => {
223 if self.has_intercept {
224 std.transform_coefficients(beta)
225 } else {
226 std.transform_coefficients_no_int(beta)
227 }
228 }
229 None => beta,
230 }
231 }
232
233 /// Transform an internal Fisher matrix d^2/d\beta'^2 to an external representation
234 /// d^2/d\beta^2.
235 pub(crate) fn inverse_transform_fisher(&self, fisher: Array2<F>) -> Array2<F> {
236 match &self.standardizer {
237 Some(std) => {
238 if self.has_intercept {
239 std.inverse_transform_fisher(fisher)
240 } else {
241 std.inverse_transform_fisher_no_int(fisher)
242 }
243 }
244 None => fisher,
245 }
246 }
247
248 /// Transform a score (gradient) vector from the internal standardized parameter basis to the
249 /// external one: `score_ext = J^T score_int` where `J = d(beta_std)/d(beta_ext)`.
250 pub(crate) fn inverse_transform_score(&self, score: Array1<F>) -> Array1<F> {
251 match &self.standardizer {
252 Some(std) => {
253 if self.has_intercept {
254 std.inverse_transform_score(score)
255 } else {
256 std.inverse_transform_score_no_int(score)
257 }
258 }
259 None => score,
260 }
261 }
262}
263
264/// Stores the per-column means and sample standard deviations learned from a design matrix, and
265/// can apply the same transformation to new data.
266///
267/// This is useful when fitting a regularized model: standardize the training
268/// data with [`Dataset::standardize`], then apply the same transformation
269/// to held-out inputs via [`Standardizer::transform`] inside of
270/// [`Fit::predict`](crate::fit::Fit::predict). Alternatively, use
271/// [`Standardizer::inverse_transform_coefficients`] to convert the fitted
272/// coefficients back to the original scale, however this will result only in the linear predictor
273/// and one would have to apply the inverse link function in order to extract predicted
274/// $`y`$-values.
275///
276/// # Degrees of freedom
277///
278/// Standard deviations are computed with ddof=1 (sample std), consistent with
279/// unbiased estimation from a training set.
280///
281/// # Zero-variance columns
282///
283/// Columns with zero variance are not scaled (i.e. scaled by 1).
284#[derive(Clone, Debug)]
285pub(crate) struct Standardizer<F> {
286 /// Per-column means.
287 pub shifts: Array1<F>,
288 /// Per-column sample standard deviations (ddof=1). May contain zeros for
289 /// constant columns.
290 pub scales: Array1<F>,
291}
292
293impl<F> Standardizer<F>
294where
295 F: Float + FromPrimitive + std::ops::DivAssign,
296{
297 /// Compute per-column means and sample standard deviations from `data`, using the `x` values
298 /// as well as the weights, if present.
299 ///
300 /// The means are given by the (possibly weight) average of the values. The standard variances
301 /// are the weighted average of the squared deviations from the weighted mean, times a bias
302 /// term $`1/(1-1/n_eff)`$.
303 ///
304 /// For empty data (0 rows) the mean defaults to 0 and the std to 1.
305 /// For a single row the mean is that row's value and the std defaults to 1
306 /// (sample std is undefined with one observation).
307 fn from_dataset(data: &Dataset<F>) -> Self {
308 let x = &data.x;
309 let (n, p) = (x.nrows(), x.ncols());
310 let weights: Array1<F> = data.apply_total_weights(Array1::<F>::ones(n));
311 let sum_weights: F = weights.sum();
312 // Change the shape to broadcast to Array2 weighting
313 let weights: Array2<F> = weights.insert_axis(Axis(1));
314 let n_eff = data.n_eff();
315 let means = if n == 0 {
316 Array1::<F>::zeros(p)
317 } else {
318 let x_w: Array2<F> = &weights * x;
319 x_w.sum_axis(Axis(0)) / sum_weights
320 };
321
322 let vars: Array1<F> = if n <= 1 {
323 Array1::<F>::ones(p)
324 } else {
325 assert!(n_eff > F::one());
326 let dx = x - means.clone();
327 let dx2: Array2<F> = dx.mapv_into(|d| d * d);
328 let dx2_w = weights * dx2;
329 let vars = dx2_w.sum_axis(Axis(0)) / sum_weights;
330 let bias = n_eff / (n_eff - F::one());
331 vars * bias
332 };
333 // For columns with zero variance, don't scale.
334 let scales = vars
335 .mapv(|v| if v > F::zero() { v } else { F::one() })
336 .sqrt();
337 // We need the actual means to compute the scales, but if we're not using the intercept,
338 // the shifts should be returned to zero.
339 let shifts = if data.has_intercept {
340 means
341 } else {
342 Array1::<F>::zeros(p)
343 };
344
345 Self { shifts, scales }
346 }
347
348 /// Apply the fitted standardization to `x`, in-place on a mutable array.
349 ///
350 /// Each column has its training mean subtracted and is divided by the training standard
351 /// deviation. Columns whose training std was zero are only adjusted by the mean. The parameter
352 /// should be zero in this case anyway (or perhaps undefined with no regularization), so this
353 /// prevents predictions being impacted from numerical imprecisions.
354 fn transform_inplace(&self, x: &mut Array2<F>) {
355 x.sub_assign(&self.shifts);
356 x.div_assign(&self.scales);
357 }
358
359 /// Apply the inverse transformation to get back the original data.
360 /// This method is crate-public so that the exact LOO computation can appropriately account for
361 /// regularization.
362 pub(crate) fn inverse_transform(&self, x: Array2<F>) -> Array2<F> {
363 (x * &self.scales) + &self.shifts
364 }
365
366 /// Convert coefficient estimates from the standardized scale back to the
367 /// original predictor scale. This provides coefficients that can be used to
368 ///
369 /// `beta` must have length equal to the rank $`K`$ which is 1 longer than the means and stds.
370 /// If a non-intercept coefficient vector, prepend it with a zero before passing it as the
371 /// transformed coefficients will have an intercept term regardless.
372 ///
373 /// The back-transformation is:
374 ///
375 /// ```math
376 /// \tilde\beta_j = \beta_j / \sigma_j \qquad j = 1,\ldots,K-1
377 /// ```
378 ///
379 /// ```math
380 /// \tilde\beta_0 = \beta_0 - \sum_j \beta_j\,\mu_j / \sigma_j
381 /// ```
382 ///
383 /// Predictors with zero standard deviation contribute zero to both (the
384 /// fit cannot identify those coefficients).
385 ///
386 /// # Errors
387 ///
388 /// Panics if the arrays are the wrong size, like all array operations.
389 fn inverse_transform_coefficients(&self, mut beta: Array1<F>) -> Array1<F> {
390 // The scales are the std devs, but fallback to no scaling if there is a std of zero.
391 // We'd expect columns with zero variance to result in a beta of zero anyway, but we'll try
392 // to handle it precisely anyway in case some assumption is broken.
393 // Scale the coefficients.
394 beta.slice_mut(s![1..]).div_assign(&self.scales);
395 // Adjust the intercept term
396 let intercept_adjust: F = (self.shifts.clone() * beta.slice(s![1..])).sum();
397 beta[0] -= intercept_adjust;
398
399 beta
400 }
401
402 /// Do the inverse transform on the coefficients in the no-intercept context.
403 /// Here scaling is applied, but not shifting.
404 fn inverse_transform_coefficients_no_int(&self, beta: Array1<F>) -> Array1<F> {
405 beta / &self.scales
406 }
407
408 /// Transform the coefficients from external back to internal (standardized). This should not
409 /// be a public function as the user should be able to engage only with the external scale.
410 fn transform_coefficients(&self, mut beta: Array1<F>) -> Array1<F> {
411 let intercept_adjust = (self.shifts.clone() * beta.slice(s![1..])).sum();
412 beta[0] += intercept_adjust;
413 beta.slice_mut(s![1..]).mul_assign(&self.scales);
414 beta
415 }
416
417 /// Transform the no-intercept coefficients from external back to internal (standardized).
418 /// In the no-intercept
419 fn transform_coefficients_no_int(&self, beta: Array1<F>) -> Array1<F> {
420 beta * &self.scales
421 }
422
423 /// Transform the Fisher information matrix from the internal standardized representation to
424 /// the external one. The fisher information is the 2nd derivative of the log-likelihood with
425 /// respect to the parameters, so the transformation needs to multiply by the Jacobian and it's
426 /// transpose on each end. This Jacobian is given by $`\frac{\partial \beta'_i}{\partial
427 /// \beta_j}`$.
428 fn inverse_transform_fisher(&self, mut fisher: Array2<F>) -> Array2<F> {
429 // Express the shifts and scales as column vectors
430 // let mu_vec = self.means.clone().insert_axis(Axis(1));
431 let scales = &self.scales;
432 let f00 = fisher[[0, 0]];
433 // The order of these in-place multiplications is important. We're trying to reduce clones
434 // by doing it in this order.
435 let block_mult: Array2<F> = scales.clone().insert_axis(Axis(1)) * scales.t();
436 // l_kk -> sigma * l_kk * sigma
437 fisher.slice_mut(s![1.., 1..]).mul_assign(&block_mult);
438 // l_k -> sigma * l_k
439 fisher.slice_mut(s![1.., 0]).mul_assign(scales);
440 fisher.slice_mut(s![0, 1..]).mul_assign(scales);
441 // f00 shows up scaled by mu in the rest of the terms.
442 let f00_mu = self.shifts.clone() * f00;
443 // Add the sigma_kk * l_k * mu_k^T and the other side to the block. Don't assume that
444 // fisher is symmetric, just in case.
445 // We're using the fact that the vector components of fisher have already been scaled by
446 // sigma.
447 // Each of the 3 terms is an outer product of a column vector and a row vector.
448 // Note: .t() on a 1D array is a no-op in ndarray, so outer products require insert_axis.
449 let row0 = fisher.slice(s![0, 1..]).to_owned().insert_axis(Axis(0));
450 let col0 = fisher.slice(s![1.., 0]).to_owned().insert_axis(Axis(1));
451 let mu_col = self.shifts.clone().insert_axis(Axis(1));
452 let mu_row = self.shifts.clone().insert_axis(Axis(0));
453 let block_terms: Array2<F> =
454 &mu_col * &row0 + &col0 * &mu_row + &mu_col * &f00_mu.clone().insert_axis(Axis(0));
455 fisher.slice_mut(s![1.., 1..]).add_assign(&block_terms);
456 // Now we add the f0*mu terms to the vector portions
457 fisher.slice_mut(s![1.., 0]).add_assign(&f00_mu);
458 fisher.slice_mut(s![0, 1..]).add_assign(&f00_mu);
459
460 fisher
461 }
462
463 /// Tranform the Fisher information matrix from the internal beta' representation to the
464 /// external beta representation, when no shifting is used due to the lack of an intercept.
465 /// The matrix part of the Jacobian is diagonal, so we don't need to do full matrix
466 /// multiplications.
467 fn inverse_transform_fisher_no_int(&self, fisher: Array2<F>) -> Array2<F> {
468 // Get a row vector of the scales
469 let std = self.scales.to_owned().insert_axis(Axis(0));
470 // The diagonal std matrix multiplies the fisher block from both sides, which should be
471 // equivalent to this element-wise multiplication
472 fisher * std.t() * std
473 }
474
475 /// Transform a gradient from the internal standardized basis to external: `score_ext = J^T score_int`.
476 ///
477 /// With intercept:
478 /// - `score_ext[0] = score_int[0]`
479 /// - `score_ext[j] = mu_j * score_int[0] + sigma_j * score_int[j]` for j > 0
480 fn inverse_transform_score(&self, mut score: Array1<F>) -> Array1<F> {
481 let s0 = score[0];
482 score.slice_mut(s![1..]).mul_assign(&self.scales);
483 score
484 .slice_mut(s![1..])
485 .add_assign(&(self.shifts.clone() * s0));
486 score
487 }
488
489 /// Transform a gradient from internal to external when there is no intercept.
490 /// `J = D_sigma`, so `score_ext = D_sigma * score_int`.
491 /// This ends up being the same computation as transform_coefficients_no_int(), but since it's
492 /// not true of the intercept version, we'll keep this function separate for now to have
493 /// consistent organization.
494 fn inverse_transform_score_no_int(&self, score: Array1<F>) -> Array1<F> {
495 score * &self.scales
496 }
497}
498
499/// Prepend the input with a column of ones.
500/// Used to incorporate a constant intercept term in a regression.
501pub(crate) fn one_pad<T>(data: ArrayView2<T>) -> Array2<T>
502where
503 T: Copy + One,
504{
505 // create the ones column
506 let ones: Array2<T> = Array2::ones((data.nrows(), 1));
507 // This should be guaranteed to succeed since we are manually specifying the dimension
508 concatenate![Axis(1), ones, data]
509}
510
511#[cfg(test)]
512mod tests {
513 use super::*;
514 use approx::assert_abs_diff_eq;
515 use ndarray::array;
516
517 impl<F: Float> Standardizer<F> {
518 /// This method isn't used in the crate since we have transform_inplace() to avoid copying,
519 /// but it's a useful utility in these tests.
520 fn transform(&self, mut x: Array2<F>) -> Array2<F> {
521 self.transform_inplace(&mut x);
522 x
523 }
524 }
525
526 fn make_dataset(x: Array2<f64>) -> Dataset<f64> {
527 let n = x.nrows();
528 let mut ds = Dataset {
529 y: Array1::zeros(n),
530 x,
531 linear_offset: None,
532 weights: None,
533 freqs: None,
534 has_intercept: false,
535 standardizer: None,
536 };
537 ds.finalize_design_matrix(true, true);
538 ds
539 }
540
541 fn get_standardizer(x: Array2<f64>) -> Standardizer<f64> {
542 let ds = make_dataset(x);
543 ds.standardizer.unwrap()
544 }
545
546 // Basic fit: check means and sample stds (ddof=1).
547 #[test]
548 fn fit_means_and_stds() {
549 let x = array![[1.0_f64, 4.0], [2.0, 6.0], [3.0, 8.0]];
550 let s = get_standardizer(x);
551 assert_abs_diff_eq!(s.shifts, array![2.0, 6.0], epsilon = 1e-12);
552 // sample std: col0 = 1.0, col1 = 2.0
553 assert_abs_diff_eq!(s.scales, array![1.0, 2.0], epsilon = 1e-12);
554 }
555
556 // Verify ddof=1, not ddof=0, by a case where they differ.
557 #[test]
558 fn fit_uses_sample_std() {
559 // Two observations: ddof=1 gives sqrt(2), ddof=0 gives 1.
560 let x = array![[1.0_f64], [3.0]];
561 let s = get_standardizer(x);
562 assert_abs_diff_eq!(s.scales[0], 2.0_f64.sqrt(), epsilon = 1e-12);
563 }
564
565 // transform produces zero-mean, unit-variance columns.
566 #[test]
567 fn transform_standardized() {
568 let x = array![[1.0_f64, 4.0], [2.0, 6.0], [3.0, 8.0]];
569 let s = get_standardizer(x.clone());
570 let x_std = s.transform(x);
571 assert_abs_diff_eq!(
572 x_std,
573 array![[-1.0, -1.0], [0.0, 0.0], [1.0, 1.0]],
574 epsilon = 1e-12
575 );
576 }
577
578 // from_dataset then transform is idempotent (same result called twice).
579 #[test]
580 fn fit_transform_consistent() {
581 let x = array![[1.0_f64, 4.0], [2.0, 6.0], [3.0, 8.0]];
582 let s = get_standardizer(x.clone());
583 let x_std = s.transform(x.clone());
584 assert_abs_diff_eq!(s.transform(x), x_std, epsilon = 1e-12);
585 }
586
587 // n=0: means default to 0, stds default to 1.
588 #[test]
589 fn fit_empty() {
590 let x = Array2::<f64>::zeros((0, 3));
591 let s = get_standardizer(x);
592 assert_abs_diff_eq!(s.shifts, array![0.0, 0.0, 0.0], epsilon = 1e-12);
593 assert_abs_diff_eq!(s.scales, array![1.0, 1.0, 1.0], epsilon = 1e-12);
594 }
595
596 // n=1: mean is the single row's value, std defaults to 1.
597 #[test]
598 fn fit_single_row() {
599 let x = array![[3.0_f64, 7.0]];
600 let s = get_standardizer(x.clone());
601 assert_abs_diff_eq!(s.shifts, array![3.0, 7.0], epsilon = 1e-12);
602 assert_abs_diff_eq!(s.scales, array![1.0, 1.0], epsilon = 1e-12);
603 // In-sample transform should give zeros.
604 let x_std = s.transform(x);
605 assert_abs_diff_eq!(x_std, array![[0.0, 0.0]], epsilon = 1e-12);
606 }
607
608 // Constant column: stored std is 0, transform doesn't scale them.
609 #[test]
610 fn transform_constant_column() {
611 let x = array![[1.0_f64, 5.0], [2.0, 5.0], [3.0, 5.0]];
612 let s = get_standardizer(x.clone());
613 assert_abs_diff_eq!(s.scales[1], 1.0, epsilon = 1e-12);
614 let x_std = s.transform(x);
615 // non-constant column standardizes normally; constant column is zeroed by the shift
616 assert_abs_diff_eq!(
617 x_std.column(1).to_owned(),
618 array![0.0, 0.0, 0.0],
619 epsilon = 1e-12
620 );
621 }
622
623 // inverse_transform_coefficients with intercept: slopes scale by 1/std,
624 // intercept absorbs the centering correction.
625 #[test]
626 fn inverse_transform_with_intercept() {
627 let x = array![[1.0_f64, 4.0], [2.0, 6.0], [3.0, 8.0]];
628 let s = get_standardizer(x); // means=[2,6], stds=[1,2]
629 let beta_std = array![0.5_f64, 2.0, 3.0]; // intercept, slope0, slope1
630 let beta_raw = s.inverse_transform_coefficients(beta_std);
631 // slope0: 2.0 / 1.0 = 2.0
632 // slope1: 3.0 / 2.0 = 1.5
633 // intercept: 0.5 - (2.0*2.0/1.0 + 3.0*6.0/2.0) = 0.5 - (4.0 + 9.0) = -12.5
634 assert_abs_diff_eq!(beta_raw, array![-12.5_f64, 2.0, 1.5], epsilon = 1e-12);
635 }
636
637 // inverse_transform_coefficients without intercept: prepend zero for the
638 // intercept slot as documented, then verify only the slope elements.
639 #[test]
640 fn inverse_transform_no_intercept() {
641 let x = array![[1.0_f64, 4.0], [2.0, 6.0], [3.0, 8.0]];
642 let s = get_standardizer(x); // means=[2,6], stds=[1,2]
643 let beta_std = array![0.0_f64, 2.0, 3.0]; // zero intercept prepended
644 let beta_raw = s.inverse_transform_coefficients(beta_std);
645 // slope0: 2.0 / 1.0 = 2.0
646 // slope1: 3.0 / 2.0 = 1.5
647 assert_abs_diff_eq!(
648 beta_raw.slice(s![1..]).to_owned(),
649 array![2.0_f64, 1.5],
650 epsilon = 1e-12
651 );
652 }
653
654 // inverse_transform_coefficients with a zero-std column: that coefficient
655 // maps to zero, but it does contribute to the zero column. Note that this is unrealistic in
656 // standard applications, since a column of zero variance should lead to a beta of about zero.
657 #[test]
658 fn inverse_transform_zero_std_column() {
659 let x = array![[1.0_f64, 5.0], [2.0, 5.0], [3.0, 5.0]];
660 let s = get_standardizer(x); // means=[2,5], stds=[1,0]
661 let beta_std = array![0.5_f64, 2.0, 3.0];
662 let beta_raw = s.inverse_transform_coefficients(beta_std);
663 // slope0: 2.0 / 1.0 = 2.0
664 // slope1: zero std → 0.0
665 // intercept: 0.5 - (2.0*2.0/1.0) - (3.0*5.0/1.0) = 0.5 - 4.0 - 15.0 = -18.5
666 // The last element has zero std, so we don't scale it at all.
667 assert_abs_diff_eq!(beta_raw, array![-18.5_f64, 2.0, 3.0], epsilon = 1e-12);
668 }
669
670 // inverse_transform_coefficients returns an error for wrong-length input.
671 #[test]
672 #[should_panic]
673 fn inverse_transform_bad_length() {
674 let x = array![[1.0_f64, 4.0], [2.0, 6.0], [3.0, 8.0]];
675 let s = get_standardizer(x); // p=2, expects length 3
676 let bad = array![1.0_f64, 2.0, 3.0, 4.0]; // length 4
677 let _ = s.inverse_transform_coefficients(bad);
678 }
679
680 // Round-trip: standardized → external → standardized recovers the original.
681 #[test]
682 fn transform_inverse_transform_roundtrip() {
683 let x = array![[1.0_f64, 4.0], [2.0, 6.0], [3.0, 8.0]];
684 let s = get_standardizer(x); // means=[2,6], stds=[1,2]
685 let beta_std = array![0.5_f64, 2.0, 3.0];
686 let beta_ext = s.inverse_transform_coefficients(beta_std.clone());
687 let recovered = s.transform_coefficients(beta_ext);
688 assert_abs_diff_eq!(recovered, beta_std, epsilon = 1e-12);
689 }
690
691 // Round-trip: external → standardized → external recovers the original.
692 #[test]
693 fn inverse_transform_transform_roundtrip() {
694 let x = array![[1.0_f64, 4.0], [2.0, 6.0], [3.0, 8.0]];
695 let s = get_standardizer(x); // means=[2,6], stds=[1,2]
696 let beta_ext = array![-12.5_f64, 2.0, 1.5];
697 let beta_std = s.transform_coefficients(beta_ext.clone());
698 let recovered = s.inverse_transform_coefficients(beta_std);
699 assert_abs_diff_eq!(recovered, beta_ext, epsilon = 1e-12);
700 }
701
702 // Round-trips hold even when a column has zero variance (std=0).
703 #[test]
704 fn roundtrip_zero_std_column() {
705 let x = array![[1.0_f64, 5.0], [2.0, 5.0], [3.0, 5.0]];
706 let s = get_standardizer(x); // means=[2,5], stds=[1,0]
707
708 let beta_std = array![0.5_f64, 2.0, 3.0];
709 let beta_ext = s.inverse_transform_coefficients(beta_std.clone());
710 let recovered_std = s.transform_coefficients(beta_ext);
711 assert_abs_diff_eq!(recovered_std, beta_std, epsilon = 1e-12);
712
713 let beta_ext2 = array![-3.5_f64, 2.0, 3.0];
714 let beta_std2 = s.transform_coefficients(beta_ext2.clone());
715 let recovered_ext = s.inverse_transform_coefficients(beta_std2);
716 assert_abs_diff_eq!(recovered_ext, beta_ext2, epsilon = 1e-12);
717 }
718}