limma/splines.rs
1//! Natural cubic spline basis, a port of `splines::ns(x, df, intercept=TRUE)`
2//! (R's `splines` package) and the B-spline machinery (`splineDesign` / de Boor)
3//! it needs. Used by the trended variance moderation in `eBayes` (`fitFDist`
4//! with a covariate).
5//!
6//! The downstream consumer only uses the basis through a least-squares fit, so
7//! the *exact* basis vectors do not matter — only the column space does. We
8//! therefore reproduce R's knot placement exactly (boundary knots at the data
9//! range, interior knots at type-7 quantiles) and span the same natural-cubic-
10//! spline subspace, but parameterise it with our own orthonormal null-space
11//! basis rather than R's. Projection onto a subspace is basis-independent, so
12//! the fitted values and residual sum of squares match R to floating-point
13//! precision regardless.
14//!
15//! [`NsBasis`] keeps that null-space transform so the fitted spline can be
16//! re-evaluated at new covariate points (R's `predict.ns`). A natural spline is
17//! a *unique* function — linear beyond the boundary knots — so its value at a
18//! new point is likewise basis-independent: evaluating our basis there and
19//! contracting with our fit coefficients reproduces R's prediction exactly. This
20//! drives the `notallok` branch of trended `fitFDist`, which extrapolates the
21//! trend to genes excluded from the fit.
22
23use ndarray::{s, Array2};
24
25use crate::ebayes::quantile_type7;
26use crate::linalg::qr_full_q;
27
28/// One row of the order-`ord` B-spline basis (or its `deriv`-th derivative,
29/// `deriv` in `{0,1,2}`) evaluated at `xv` on knot vector `t`. The returned
30/// vector has length `t.len() - ord` (the number of B-splines).
31///
32/// Cox–de Boor value recurrence, with the standard derivative recurrence applied
33/// once or twice. At the global right boundary (`xv == max knot`) the order-1
34/// indicator is taken as the left limit so the clamped basis evaluates to the
35/// endpoint value, matching `splines::splineDesign`.
36fn bspline_row(t: &[f64], xv: f64, ord: usize, deriv: usize) -> Vec<f64> {
37 let m = t.len();
38
39 // Order-1 (piecewise-constant) basis, right-continuous on [t_i, t_{i+1}).
40 let mut tables: Vec<Vec<f64>> = Vec::with_capacity(ord);
41 let mut n1 = vec![0.0_f64; m - 1];
42 for i in 0..(m - 1) {
43 if t[i] <= xv && xv < t[i + 1] {
44 n1[i] = 1.0;
45 }
46 }
47 // Left limit at the global right boundary knot.
48 if xv >= t[m - 1] {
49 for i in (0..(m - 1)).rev() {
50 if t[i] < t[i + 1] {
51 n1[i] = 1.0;
52 break;
53 }
54 }
55 }
56 tables.push(n1);
57
58 // Raise to order `ord` by the Cox–de Boor recurrence.
59 for o in 2..=ord {
60 let prev = &tables[o - 2];
61 let len = m - o;
62 let mut no = vec![0.0_f64; len];
63 for j in 0..len {
64 let d1 = t[j + o - 1] - t[j];
65 let a1 = if d1 != 0.0 { (xv - t[j]) / d1 } else { 0.0 };
66 let d2 = t[j + o] - t[j + 1];
67 let a2 = if d2 != 0.0 { (t[j + o] - xv) / d2 } else { 0.0 };
68 no[j] = a1 * prev[j] + a2 * prev[j + 1];
69 }
70 tables.push(no);
71 }
72
73 match deriv {
74 0 => tables[ord - 1].clone(),
75 1 => {
76 // d/dx B_{j,ord} = (ord-1) [ B_{j,ord-1}/(t_{j+ord-1}-t_j)
77 // - B_{j+1,ord-1}/(t_{j+ord}-t_{j+1}) ]
78 let prev = &tables[ord - 2];
79 deriv_combine(t, prev, ord)
80 }
81 2 => {
82 // First derivative of the order-(ord-1) basis, then differentiate
83 // once more with the order-`ord` weights.
84 let lower = &tables[ord - 3];
85 let d1 = deriv_combine(t, lower, ord - 1);
86 deriv_combine(t, &d1, ord)
87 }
88 _ => panic!("bspline_row: deriv must be 0, 1 or 2"),
89 }
90}
91
92/// Apply one step of the B-spline derivative recurrence at order `ord` to a
93/// vector `prev` of order-(ord-1) basis values (or derivatives). Returns a
94/// vector of length `t.len() - ord`.
95fn deriv_combine(t: &[f64], prev: &[f64], ord: usize) -> Vec<f64> {
96 let m = t.len();
97 let len = m - ord;
98 let scale = (ord - 1) as f64;
99 let mut out = vec![0.0_f64; len];
100 for j in 0..len {
101 let d1 = t[j + ord - 1] - t[j];
102 let term1 = if d1 != 0.0 { prev[j] / d1 } else { 0.0 };
103 let d2 = t[j + ord] - t[j + 1];
104 let term2 = if d2 != 0.0 { prev[j + 1] / d2 } else { 0.0 };
105 out[j] = scale * (term1 - term2);
106 }
107 out
108}
109
110/// `splines::splineDesign(knots, x, ord, derivs=deriv)`: the order-`ord` B-spline
111/// basis (or `deriv`-th derivative) at each point of `x`. Shape `x.len() x
112/// (knots.len() - ord)`.
113fn spline_design(knots: &[f64], x: &[f64], ord: usize, deriv: usize) -> Array2<f64> {
114 let nb = knots.len() - ord;
115 let mut out = Array2::<f64>::zeros((x.len(), nb));
116 for (r, &xv) in x.iter().enumerate() {
117 let row = bspline_row(knots, xv, ord, deriv);
118 for (c, val) in row.into_iter().enumerate() {
119 out[[r, c]] = val;
120 }
121 }
122 out
123}
124
125/// A fitted natural-cubic-spline basis: the clamped knot vector, boundary knots,
126/// and the null-space transform mapping the order-4 B-spline pre-basis to the
127/// natural-spline basis. Spans the same column space as `splines::ns(x, df=df,
128/// intercept=TRUE)` and can be re-evaluated at new covariate points (cf. R's
129/// `predict.ns`), including linear extrapolation beyond the boundary knots.
130pub(crate) struct NsBasis {
131 /// Augmented (clamped) knot vector: ord-fold boundary knots plus interior.
132 aknots: Vec<f64>,
133 /// Boundary knots = the fitting data range.
134 xmin: f64,
135 xmax: f64,
136 /// `(df+2) x df` transform from the order-4 B-spline pre-basis to the
137 /// natural-spline basis (trailing columns of the constraint null space).
138 nbasis: Array2<f64>,
139}
140
141impl NsBasis {
142 /// Fit the basis to `x` with `df` degrees of freedom (`df >= 2`, giving
143 /// `df - 2` interior knots). Boundary knots are the data range; interior
144 /// knots are type-7 quantiles at the evenly spaced probabilities
145 /// `i/(df-1)` for `i = 1..df-2`.
146 pub(crate) fn fit(x: &[f64], df: usize) -> NsBasis {
147 assert!(df >= 2, "NsBasis requires df >= 2");
148 let xmin = x.iter().cloned().fold(f64::INFINITY, f64::min);
149 let xmax = x.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
150
151 let n_iknots = df - 2;
152 let iknots: Vec<f64> = if n_iknots > 0 {
153 let probs: Vec<f64> = (1..=n_iknots)
154 .map(|i| i as f64 / (n_iknots as f64 + 1.0))
155 .collect();
156 let mut xs = x.to_vec();
157 xs.sort_by(|a, b| a.partial_cmp(b).unwrap());
158 quantile_type7(&xs, &probs)
159 } else {
160 Vec::new()
161 };
162
163 let ord = 4usize;
164 let mut aknots: Vec<f64> = Vec::with_capacity(2 * ord + n_iknots);
165 aknots.extend(std::iter::repeat_n(xmin, ord));
166 aknots.extend_from_slice(&iknots);
167 aknots.extend(std::iter::repeat_n(xmax, ord));
168 aknots.sort_by(|a, b| a.partial_cmp(b).unwrap());
169
170 // Natural boundary constraint: second derivative zero at the boundary
171 // knots. Its null space (trailing columns of Q from the QR of its
172 // transpose) spans the natural cubic spline subspace.
173 let constraint = spline_design(&aknots, &[xmin, xmax], ord, 2); // 2 x (df+2)
174 let ct = constraint.t().to_owned(); // (df+2) x 2
175 let q = qr_full_q(&ct); // (df+2) x (df+2)
176 let nb = ct.nrows();
177 let nbasis = q.slice(s![.., 2..nb]).to_owned(); // (df+2) x df
178
179 NsBasis {
180 aknots,
181 xmin,
182 xmax,
183 nbasis,
184 }
185 }
186
187 /// Evaluate the natural-spline basis at each point of `newx`, returning a
188 /// `newx.len() x df` matrix. Points inside `[xmin, xmax]` use the clamped
189 /// B-spline basis directly; points outside are extrapolated linearly from
190 /// the value and first derivative at the nearest boundary knot, matching R's
191 /// `predict.ns`. Because the same null-space transform is reused, evaluating
192 /// at new points gives the same fitted function as R regardless of the
193 /// (basis-dependent) coefficients.
194 pub(crate) fn eval(&self, newx: &[f64]) -> Array2<f64> {
195 let ord = 4usize;
196 let npre = self.aknots.len() - ord; // df + 2
197 let mut pre = Array2::<f64>::zeros((newx.len(), npre));
198 for (r, &xv) in newx.iter().enumerate() {
199 let row = if xv < self.xmin {
200 self.extrapolate_row(self.xmin, xv)
201 } else if xv > self.xmax {
202 self.extrapolate_row(self.xmax, xv)
203 } else {
204 bspline_row(&self.aknots, xv, ord, 0)
205 };
206 for (c, val) in row.into_iter().enumerate() {
207 pre[[r, c]] = val;
208 }
209 }
210 pre.dot(&self.nbasis) // newx.len() x df
211 }
212
213 /// Linear extrapolation of the order-4 pre-basis from boundary knot `pivot`
214 /// out to `xv`: `value(pivot) + (xv - pivot) * value'(pivot)`, component-wise.
215 fn extrapolate_row(&self, pivot: f64, xv: f64) -> Vec<f64> {
216 let ord = 4usize;
217 let val = bspline_row(&self.aknots, pivot, ord, 0);
218 let der = bspline_row(&self.aknots, pivot, ord, 1);
219 let dx = xv - pivot;
220 val.iter()
221 .zip(der.iter())
222 .map(|(&v, &d)| v + dx * d)
223 .collect()
224 }
225}
226
227#[cfg(test)]
228mod tests {
229 use super::*;
230 use crate::linalg::{qr_econ, solve_upper};
231 use ndarray::Array1;
232
233 /// Natural cubic spline design at the fitting points: `NsBasis::fit` then
234 /// `eval` at `x`. Spans the same column space as `splines::ns(x, df,
235 /// intercept=TRUE)`.
236 fn ns_design(x: &[f64], df: usize) -> Array2<f64> {
237 NsBasis::fit(x, df).eval(x)
238 }
239
240 /// Project `y` onto `col(design)` and return (fitted, residual sum of
241 /// squares). Basis-independent, so it equals R's `lm.fit` up to f.p. error.
242 fn project(design: &Array2<f64>, y: &[f64]) -> (Vec<f64>, f64) {
243 let yv = Array1::from(y.to_vec());
244 let (q, r) = qr_econ(design);
245 let qty = q.t().dot(&yv);
246 let coef = solve_upper(&r, &qty);
247 let fitted = design.dot(&coef);
248 let rss: f64 = yv
249 .iter()
250 .zip(fitted.iter())
251 .map(|(a, b)| (a - b) * (a - b))
252 .sum();
253 (fitted.to_vec(), rss)
254 }
255
256 /// At the boundary knots a clamped natural-spline basis interpolates the
257 /// endpoints: the constant function is in the span, and the basis sums to a
258 /// finite, well-conditioned set (no NaNs/Infs anywhere).
259 #[test]
260 fn ns_basis_is_finite_and_spans_constants() {
261 let x: Vec<f64> = (0..50).map(|i| (i as f64) * 0.2 - 3.0).collect();
262 let d = ns_design(&x, 4);
263 assert_eq!(d.nrows(), 50);
264 assert_eq!(d.ncols(), 4);
265 assert!(d.iter().all(|v| v.is_finite()));
266 // The constant vector lies in the column space, so regressing 1 on the
267 // basis reproduces it with ~zero residual.
268 let ones = vec![1.0_f64; 50];
269 let (_fit, rss) = project(&d, &ones);
270 assert!(rss < 1e-18, "constant not in span, rss={rss}");
271 }
272
273 /// A cubic polynomial restricted between the boundary knots lies in the
274 /// natural-spline space only approximately, but a *linear* function lies in
275 /// it exactly (natural splines are linear past the boundary and a line is
276 /// its own natural extension). Regressing a line reproduces it exactly.
277 #[test]
278 fn ns_reproduces_linear_functions() {
279 let x: Vec<f64> = (0..40).map(|i| (i as f64).powf(1.3) * 0.1).collect();
280 let d = ns_design(&x, 5);
281 assert_eq!(d.ncols(), 5);
282 let y: Vec<f64> = x.iter().map(|&v| 2.5 * v - 0.7).collect();
283 let (fit, rss) = project(&d, &y);
284 assert!(rss < 1e-16, "line not reproduced, rss={rss}");
285 for (a, b) in fit.iter().zip(y.iter()) {
286 assert!((a - b).abs() < 1e-8);
287 }
288 }
289
290 /// Reference values from R:
291 /// `splineDesign(c(0,0,0,0,1,2,3,3,3,3), c(0,1.5,3), 4)` — the order-4
292 /// B-spline basis on a clamped knot vector with two interior knots,
293 /// evaluated at the left boundary, an interior point, and the right
294 /// boundary. Confirms the de Boor recurrence and the right-boundary limit.
295 #[test]
296 fn spline_design_matches_r_reference() {
297 let knots = [0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 3.0, 3.0];
298 let b = spline_design(&knots, &[0.0, 1.5, 3.0], 4, 0);
299 // R: row 1 = (1,0,0,0,0,0); row 3 = (0,0,0,0,0,1).
300 assert!((b[[0, 0]] - 1.0).abs() < 1e-12);
301 assert!(b.row(0).iter().skip(1).all(|&v| v.abs() < 1e-12));
302 assert!((b[[2, 5]] - 1.0).abs() < 1e-12);
303 assert!(b.row(2).iter().take(5).all(|&v| v.abs() < 1e-12));
304 // Each row of a B-spline basis sums to 1 (partition of unity).
305 for r in 0..3 {
306 let s: f64 = b.row(r).sum();
307 assert!((s - 1.0).abs() < 1e-12, "row {r} sums to {s}");
308 }
309 // R: splineDesign(...,1.5,...) = (0, 0.03125, 0.46875, 0.46875, 0.03125,
310 // 0) — symmetric about the midpoint 1.5 of the [0,3] knot layout.
311 assert!((b[[1, 1]] - 0.031_25).abs() < 1e-12, "{}", b[[1, 1]]);
312 assert!((b[[1, 2]] - 0.468_75).abs() < 1e-12, "{}", b[[1, 2]]);
313 assert!((b[[1, 3]] - 0.468_75).abs() < 1e-12, "{}", b[[1, 3]]);
314 assert!((b[[1, 4]] - 0.031_25).abs() < 1e-12, "{}", b[[1, 4]]);
315 }
316}