Skip to main content

limma/
splines.rs

1//! Natural cubic spline basis, a port of `splines::ns(x, df, intercept=TRUE)`
2//! (R's `splines` package) and the B-spline machinery (`splineDesign` / de Boor)
3//! it needs. Used by the trended variance moderation in `eBayes` (`fitFDist`
4//! with a covariate).
5//!
6//! The downstream consumer only uses the basis through a least-squares fit, so
7//! the *exact* basis vectors do not matter — only the column space does. We
8//! therefore reproduce R's knot placement exactly (boundary knots at the data
9//! range, interior knots at type-7 quantiles) and span the same natural-cubic-
10//! spline subspace, but parameterise it with our own orthonormal null-space
11//! basis rather than R's. Projection onto a subspace is basis-independent, so
12//! the fitted values and residual sum of squares match R to floating-point
13//! precision regardless.
14//!
15//! [`NsBasis`] keeps that null-space transform so the fitted spline can be
16//! re-evaluated at new covariate points (R's `predict.ns`). A natural spline is
17//! a *unique* function — linear beyond the boundary knots — so its value at a
18//! new point is likewise basis-independent: evaluating our basis there and
19//! contracting with our fit coefficients reproduces R's prediction exactly. This
20//! drives the `notallok` branch of trended `fitFDist`, which extrapolates the
21//! trend to genes excluded from the fit.
22
23use ndarray::{s, Array2};
24
25use crate::ebayes::quantile_type7;
26use crate::linalg::qr_full_q;
27
28/// One row of the order-`ord` B-spline basis (or its `deriv`-th derivative,
29/// `deriv` in `{0,1,2}`) evaluated at `xv` on knot vector `t`. The returned
30/// vector has length `t.len() - ord` (the number of B-splines).
31///
32/// Cox–de Boor value recurrence, with the standard derivative recurrence applied
33/// once or twice. At the global right boundary (`xv == max knot`) the order-1
34/// indicator is taken as the left limit so the clamped basis evaluates to the
35/// endpoint value, matching `splines::splineDesign`.
36fn bspline_row(t: &[f64], xv: f64, ord: usize, deriv: usize) -> Vec<f64> {
37    let m = t.len();
38
39    // Order-1 (piecewise-constant) basis, right-continuous on [t_i, t_{i+1}).
40    let mut tables: Vec<Vec<f64>> = Vec::with_capacity(ord);
41    let mut n1 = vec![0.0_f64; m - 1];
42    for i in 0..(m - 1) {
43        if t[i] <= xv && xv < t[i + 1] {
44            n1[i] = 1.0;
45        }
46    }
47    // Left limit at the global right boundary knot.
48    if xv >= t[m - 1] {
49        for i in (0..(m - 1)).rev() {
50            if t[i] < t[i + 1] {
51                n1[i] = 1.0;
52                break;
53            }
54        }
55    }
56    tables.push(n1);
57
58    // Raise to order `ord` by the Cox–de Boor recurrence.
59    for o in 2..=ord {
60        let prev = &tables[o - 2];
61        let len = m - o;
62        let mut no = vec![0.0_f64; len];
63        for j in 0..len {
64            let d1 = t[j + o - 1] - t[j];
65            let a1 = if d1 != 0.0 { (xv - t[j]) / d1 } else { 0.0 };
66            let d2 = t[j + o] - t[j + 1];
67            let a2 = if d2 != 0.0 { (t[j + o] - xv) / d2 } else { 0.0 };
68            no[j] = a1 * prev[j] + a2 * prev[j + 1];
69        }
70        tables.push(no);
71    }
72
73    match deriv {
74        0 => tables[ord - 1].clone(),
75        1 => {
76            // d/dx B_{j,ord} = (ord-1) [ B_{j,ord-1}/(t_{j+ord-1}-t_j)
77            //                          - B_{j+1,ord-1}/(t_{j+ord}-t_{j+1}) ]
78            let prev = &tables[ord - 2];
79            deriv_combine(t, prev, ord)
80        }
81        2 => {
82            // First derivative of the order-(ord-1) basis, then differentiate
83            // once more with the order-`ord` weights.
84            let lower = &tables[ord - 3];
85            let d1 = deriv_combine(t, lower, ord - 1);
86            deriv_combine(t, &d1, ord)
87        }
88        _ => panic!("bspline_row: deriv must be 0, 1 or 2"),
89    }
90}
91
92/// Apply one step of the B-spline derivative recurrence at order `ord` to a
93/// vector `prev` of order-(ord-1) basis values (or derivatives). Returns a
94/// vector of length `t.len() - ord`.
95fn deriv_combine(t: &[f64], prev: &[f64], ord: usize) -> Vec<f64> {
96    let m = t.len();
97    let len = m - ord;
98    let scale = (ord - 1) as f64;
99    let mut out = vec![0.0_f64; len];
100    for j in 0..len {
101        let d1 = t[j + ord - 1] - t[j];
102        let term1 = if d1 != 0.0 { prev[j] / d1 } else { 0.0 };
103        let d2 = t[j + ord] - t[j + 1];
104        let term2 = if d2 != 0.0 { prev[j + 1] / d2 } else { 0.0 };
105        out[j] = scale * (term1 - term2);
106    }
107    out
108}
109
110/// `splines::splineDesign(knots, x, ord, derivs=deriv)`: the order-`ord` B-spline
111/// basis (or `deriv`-th derivative) at each point of `x`. Shape `x.len() x
112/// (knots.len() - ord)`.
113fn spline_design(knots: &[f64], x: &[f64], ord: usize, deriv: usize) -> Array2<f64> {
114    let nb = knots.len() - ord;
115    let mut out = Array2::<f64>::zeros((x.len(), nb));
116    for (r, &xv) in x.iter().enumerate() {
117        let row = bspline_row(knots, xv, ord, deriv);
118        for (c, val) in row.into_iter().enumerate() {
119            out[[r, c]] = val;
120        }
121    }
122    out
123}
124
125/// A fitted natural-cubic-spline basis: the clamped knot vector, boundary knots,
126/// and the null-space transform mapping the order-4 B-spline pre-basis to the
127/// natural-spline basis. Spans the same column space as `splines::ns(x, df=df,
128/// intercept=TRUE)` and can be re-evaluated at new covariate points (cf. R's
129/// `predict.ns`), including linear extrapolation beyond the boundary knots.
130pub(crate) struct NsBasis {
131    /// Augmented (clamped) knot vector: ord-fold boundary knots plus interior.
132    aknots: Vec<f64>,
133    /// Boundary knots = the fitting data range.
134    xmin: f64,
135    xmax: f64,
136    /// `(df+2) x df` transform from the order-4 B-spline pre-basis to the
137    /// natural-spline basis (trailing columns of the constraint null space).
138    nbasis: Array2<f64>,
139}
140
141impl NsBasis {
142    /// Fit the basis to `x` with `df` degrees of freedom (`df >= 2`, giving
143    /// `df - 2` interior knots). Boundary knots are the data range; interior
144    /// knots are type-7 quantiles at the evenly spaced probabilities
145    /// `i/(df-1)` for `i = 1..df-2`.
146    pub(crate) fn fit(x: &[f64], df: usize) -> NsBasis {
147        assert!(df >= 2, "NsBasis requires df >= 2");
148        let xmin = x.iter().cloned().fold(f64::INFINITY, f64::min);
149        let xmax = x.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
150
151        let n_iknots = df - 2;
152        let iknots: Vec<f64> = if n_iknots > 0 {
153            let probs: Vec<f64> = (1..=n_iknots)
154                .map(|i| i as f64 / (n_iknots as f64 + 1.0))
155                .collect();
156            let mut xs = x.to_vec();
157            xs.sort_by(|a, b| a.partial_cmp(b).unwrap());
158            quantile_type7(&xs, &probs)
159        } else {
160            Vec::new()
161        };
162
163        let ord = 4usize;
164        let mut aknots: Vec<f64> = Vec::with_capacity(2 * ord + n_iknots);
165        aknots.extend(std::iter::repeat_n(xmin, ord));
166        aknots.extend_from_slice(&iknots);
167        aknots.extend(std::iter::repeat_n(xmax, ord));
168        aknots.sort_by(|a, b| a.partial_cmp(b).unwrap());
169
170        // Natural boundary constraint: second derivative zero at the boundary
171        // knots. Its null space (trailing columns of Q from the QR of its
172        // transpose) spans the natural cubic spline subspace.
173        let constraint = spline_design(&aknots, &[xmin, xmax], ord, 2); // 2 x (df+2)
174        let ct = constraint.t().to_owned(); // (df+2) x 2
175        let q = qr_full_q(&ct); // (df+2) x (df+2)
176        let nb = ct.nrows();
177        let nbasis = q.slice(s![.., 2..nb]).to_owned(); // (df+2) x df
178
179        NsBasis {
180            aknots,
181            xmin,
182            xmax,
183            nbasis,
184        }
185    }
186
187    /// Evaluate the natural-spline basis at each point of `newx`, returning a
188    /// `newx.len() x df` matrix. Points inside `[xmin, xmax]` use the clamped
189    /// B-spline basis directly; points outside are extrapolated linearly from
190    /// the value and first derivative at the nearest boundary knot, matching R's
191    /// `predict.ns`. Because the same null-space transform is reused, evaluating
192    /// at new points gives the same fitted function as R regardless of the
193    /// (basis-dependent) coefficients.
194    pub(crate) fn eval(&self, newx: &[f64]) -> Array2<f64> {
195        let ord = 4usize;
196        let npre = self.aknots.len() - ord; // df + 2
197        let mut pre = Array2::<f64>::zeros((newx.len(), npre));
198        for (r, &xv) in newx.iter().enumerate() {
199            let row = if xv < self.xmin {
200                self.extrapolate_row(self.xmin, xv)
201            } else if xv > self.xmax {
202                self.extrapolate_row(self.xmax, xv)
203            } else {
204                bspline_row(&self.aknots, xv, ord, 0)
205            };
206            for (c, val) in row.into_iter().enumerate() {
207                pre[[r, c]] = val;
208            }
209        }
210        pre.dot(&self.nbasis) // newx.len() x df
211    }
212
213    /// Linear extrapolation of the order-4 pre-basis from boundary knot `pivot`
214    /// out to `xv`: `value(pivot) + (xv - pivot) * value'(pivot)`, component-wise.
215    fn extrapolate_row(&self, pivot: f64, xv: f64) -> Vec<f64> {
216        let ord = 4usize;
217        let val = bspline_row(&self.aknots, pivot, ord, 0);
218        let der = bspline_row(&self.aknots, pivot, ord, 1);
219        let dx = xv - pivot;
220        val.iter()
221            .zip(der.iter())
222            .map(|(&v, &d)| v + dx * d)
223            .collect()
224    }
225}
226
227#[cfg(test)]
228mod tests {
229    use super::*;
230    use crate::linalg::{qr_econ, solve_upper};
231    use ndarray::Array1;
232
233    /// Natural cubic spline design at the fitting points: `NsBasis::fit` then
234    /// `eval` at `x`. Spans the same column space as `splines::ns(x, df,
235    /// intercept=TRUE)`.
236    fn ns_design(x: &[f64], df: usize) -> Array2<f64> {
237        NsBasis::fit(x, df).eval(x)
238    }
239
240    /// Project `y` onto `col(design)` and return (fitted, residual sum of
241    /// squares). Basis-independent, so it equals R's `lm.fit` up to f.p. error.
242    fn project(design: &Array2<f64>, y: &[f64]) -> (Vec<f64>, f64) {
243        let yv = Array1::from(y.to_vec());
244        let (q, r) = qr_econ(design);
245        let qty = q.t().dot(&yv);
246        let coef = solve_upper(&r, &qty);
247        let fitted = design.dot(&coef);
248        let rss: f64 = yv
249            .iter()
250            .zip(fitted.iter())
251            .map(|(a, b)| (a - b) * (a - b))
252            .sum();
253        (fitted.to_vec(), rss)
254    }
255
256    /// At the boundary knots a clamped natural-spline basis interpolates the
257    /// endpoints: the constant function is in the span, and the basis sums to a
258    /// finite, well-conditioned set (no NaNs/Infs anywhere).
259    #[test]
260    fn ns_basis_is_finite_and_spans_constants() {
261        let x: Vec<f64> = (0..50).map(|i| (i as f64) * 0.2 - 3.0).collect();
262        let d = ns_design(&x, 4);
263        assert_eq!(d.nrows(), 50);
264        assert_eq!(d.ncols(), 4);
265        assert!(d.iter().all(|v| v.is_finite()));
266        // The constant vector lies in the column space, so regressing 1 on the
267        // basis reproduces it with ~zero residual.
268        let ones = vec![1.0_f64; 50];
269        let (_fit, rss) = project(&d, &ones);
270        assert!(rss < 1e-18, "constant not in span, rss={rss}");
271    }
272
273    /// A cubic polynomial restricted between the boundary knots lies in the
274    /// natural-spline space only approximately, but a *linear* function lies in
275    /// it exactly (natural splines are linear past the boundary and a line is
276    /// its own natural extension). Regressing a line reproduces it exactly.
277    #[test]
278    fn ns_reproduces_linear_functions() {
279        let x: Vec<f64> = (0..40).map(|i| (i as f64).powf(1.3) * 0.1).collect();
280        let d = ns_design(&x, 5);
281        assert_eq!(d.ncols(), 5);
282        let y: Vec<f64> = x.iter().map(|&v| 2.5 * v - 0.7).collect();
283        let (fit, rss) = project(&d, &y);
284        assert!(rss < 1e-16, "line not reproduced, rss={rss}");
285        for (a, b) in fit.iter().zip(y.iter()) {
286            assert!((a - b).abs() < 1e-8);
287        }
288    }
289
290    /// Reference values from R:
291    /// `splineDesign(c(0,0,0,0,1,2,3,3,3,3), c(0,1.5,3), 4)` — the order-4
292    /// B-spline basis on a clamped knot vector with two interior knots,
293    /// evaluated at the left boundary, an interior point, and the right
294    /// boundary. Confirms the de Boor recurrence and the right-boundary limit.
295    #[test]
296    fn spline_design_matches_r_reference() {
297        let knots = [0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 3.0, 3.0];
298        let b = spline_design(&knots, &[0.0, 1.5, 3.0], 4, 0);
299        // R: row 1 = (1,0,0,0,0,0); row 3 = (0,0,0,0,0,1).
300        assert!((b[[0, 0]] - 1.0).abs() < 1e-12);
301        assert!(b.row(0).iter().skip(1).all(|&v| v.abs() < 1e-12));
302        assert!((b[[2, 5]] - 1.0).abs() < 1e-12);
303        assert!(b.row(2).iter().take(5).all(|&v| v.abs() < 1e-12));
304        // Each row of a B-spline basis sums to 1 (partition of unity).
305        for r in 0..3 {
306            let s: f64 = b.row(r).sum();
307            assert!((s - 1.0).abs() < 1e-12, "row {r} sums to {s}");
308        }
309        // R: splineDesign(...,1.5,...) = (0, 0.03125, 0.46875, 0.46875, 0.03125,
310        // 0) — symmetric about the midpoint 1.5 of the [0,3] knot layout.
311        assert!((b[[1, 1]] - 0.031_25).abs() < 1e-12, "{}", b[[1, 1]]);
312        assert!((b[[1, 2]] - 0.468_75).abs() < 1e-12, "{}", b[[1, 2]]);
313        assert!((b[[1, 3]] - 0.468_75).abs() < 1e-12, "{}", b[[1, 3]]);
314        assert!((b[[1, 4]] - 0.031_25).abs() < 1e-12, "{}", b[[1, 4]]);
315    }
316}