1use anyhow::{bail, Result};
30use ndarray::{Array1, Array2};
31
32use crate::dups::{kron_rows, unwrapdups};
33use crate::fit::{new_fit, MArrayLM};
34use crate::linalg::{cholesky_upper, qr_econ, solve_upper, solve_upper_transpose, xtx_inv_from_r};
35
36fn nanmean<'a, I: IntoIterator<Item = &'a f64>>(it: I) -> f64 {
38 let mut sum = 0.0;
39 let mut cnt = 0usize;
40 for &v in it {
41 if v.is_finite() {
42 sum += v;
43 cnt += 1;
44 }
45 }
46 if cnt > 0 {
47 sum / cnt as f64
48 } else {
49 f64::NAN
50 }
51}
52
53fn gls_amean(exprs: &Array2<f64>, ndups: usize, spacing: usize) -> Array1<f64> {
56 let nspots = exprs.nrows();
57 let spotmean: Vec<f64> = (0..nspots).map(|i| nanmean(exprs.row(i))).collect();
58 if ndups <= 1 {
59 return Array1::from(spotmean);
60 }
61 let col = Array2::from_shape_vec((nspots, 1), spotmean).expect("column shape");
62 let uw = unwrapdups(&col, ndups, spacing.max(1));
63 (0..uw.nrows()).map(|g| nanmean(uw.row(g))).collect()
64}
65
66fn backsolve_t_cols(u: &Array2<f64>, b: &Array2<f64>) -> Array2<f64> {
68 let (n, k) = b.dim();
69 let mut out = Array2::<f64>::zeros((n, k));
70 for j in 0..k {
71 let sol = solve_upper_transpose(u, &b.column(j).to_owned());
72 out.column_mut(j).assign(&sol);
73 }
74 out
75}
76
77#[allow(clippy::too_many_arguments)]
85pub fn gls_series(
86 exprs: &Array2<f64>,
87 design: &Array2<f64>,
88 ndups: usize,
89 spacing: usize,
90 block: Option<&[i64]>,
91 correlation: f64,
92 weights: Option<&Array2<f64>>,
93 gene_names: Vec<String>,
94 coef_names: Vec<String>,
95) -> Result<MArrayLM> {
96 let narrays0 = exprs.ncols();
97 if design.nrows() != narrays0 {
98 bail!(
99 "design rows ({}) must match number of arrays ({})",
100 design.nrows(),
101 narrays0
102 );
103 }
104 if correlation.abs() >= 1.0 {
105 bail!("correlation is 1 or -1, so the model is degenerate");
106 }
107 let nbeta = design.ncols();
108
109 let amean = gls_amean(exprs, ndups, spacing);
111
112 let mut m = exprs.clone();
114 let mut wmat: Option<Array2<f64>> = None;
115 if let Some(w) = weights {
116 if w.dim() != exprs.dim() {
117 bail!("weights dimensions must match exprs");
118 }
119 let mut wc = w.clone();
120 for i in 0..wc.nrows() {
121 for j in 0..wc.ncols() {
122 if wc[[i, j]].is_nan() {
123 wc[[i, j]] = 0.0;
124 }
125 if wc[[i, j]] < 1e-15 {
126 m[[i, j]] = f64::NAN;
127 wc[[i, j]] = f64::NAN;
128 }
129 }
130 }
131 wmat = Some(wc);
132 }
133
134 let (cormatrix, mwork, wwork, dwork) = match block {
137 None => {
138 let nd = if ndups < 2 { 1 } else { ndups };
139 let corr = if ndups < 2 { 0.0 } else { correlation };
140 let sp = spacing.max(1);
141 let mu = unwrapdups(&m, nd, sp);
142 let wu = wmat.as_ref().map(|w| unwrapdups(w, nd, sp));
143 let du = kron_rows(design, nd);
144 let n = du.nrows();
145 let mut cor = Array2::<f64>::zeros((n, n));
146 for i in 0..n {
147 for j in 0..n {
148 if i / nd == j / nd {
149 cor[[i, j]] = corr;
150 }
151 }
152 cor[[i, i]] = 1.0;
153 }
154 (cor, mu, wu, du)
155 }
156 Some(b) => {
157 if b.len() != narrays0 {
158 bail!("length of block does not match number of arrays");
159 }
160 let n = narrays0;
161 let mut cor = Array2::<f64>::zeros((n, n));
162 for i in 0..n {
163 for j in 0..n {
164 if b[i] == b[j] {
165 cor[[i, j]] = correlation;
166 }
167 }
168 cor[[i, i]] = 1.0;
169 }
170 (cor, m, wmat, design.clone())
171 }
172 };
173
174 let ngenes = mwork.nrows();
175 let n = mwork.ncols();
176
177 let all_finite = mwork.iter().all(|v| v.is_finite());
178 let no_probe_wts = all_finite && wwork.is_none();
179
180 let mut coefficients = Array2::<f64>::from_elem((ngenes, nbeta), f64::NAN);
181 let mut stdev_unscaled = Array2::<f64>::from_elem((ngenes, nbeta), f64::NAN);
182 let mut sigma = Array1::<f64>::from_elem(ngenes, f64::NAN);
183 let mut df_residual = Array1::<f64>::zeros(ngenes);
184
185 let cholv_full = cholesky_upper(&cormatrix);
188 let xstar_full = backsolve_t_cols(&cholv_full, &dwork);
189 let (qf, rf) = qr_econ(&xstar_full);
190 let cov_coefficients = xtx_inv_from_r(&rf);
191
192 if no_probe_wts {
193 let df = (n - nbeta) as f64;
195 let stdev_row: Vec<f64> = (0..nbeta)
196 .map(|j| cov_coefficients[[j, j]].sqrt())
197 .collect();
198 for g in 0..ngenes {
199 let zstar = solve_upper_transpose(&cholv_full, &mwork.row(g).to_owned());
200 let qty = qf.t().dot(&zstar);
201 let beta = solve_upper(&rf, &qty);
202 for j in 0..nbeta {
203 coefficients[[g, j]] = beta[j];
204 stdev_unscaled[[g, j]] = stdev_row[j];
205 }
206 df_residual[g] = df;
207 if df > 0.0 {
208 let ssy: f64 = zstar.iter().map(|&v| v * v).sum();
209 let ssfit: f64 = qty.iter().map(|&v| v * v).sum();
210 sigma[g] = ((ssy - ssfit).max(0.0) / df).sqrt();
211 }
212 }
213 } else {
214 for g in 0..ngenes {
216 let yfull = mwork.row(g);
217 let obs: Vec<usize> = (0..n).filter(|&k| yfull[k].is_finite()).collect();
218 let nobs = obs.len();
219 if nobs == 0 {
220 continue;
221 }
222 let mut xsub = Array2::<f64>::zeros((nobs, nbeta));
223 let mut vsub = Array2::<f64>::zeros((nobs, nobs));
224 for (ri, &ki) in obs.iter().enumerate() {
225 for j in 0..nbeta {
226 xsub[[ri, j]] = dwork[[ki, j]];
227 }
228 for (cj, &kj) in obs.iter().enumerate() {
229 vsub[[ri, cj]] = cormatrix[[ki, kj]];
230 }
231 }
232 if let Some(w) = &wwork {
233 let wrs: Vec<f64> = obs.iter().map(|&k| 1.0 / w[[g, k]].sqrt()).collect();
234 for ri in 0..nobs {
235 for cj in 0..nobs {
236 vsub[[ri, cj]] *= wrs[ri] * wrs[cj];
237 }
238 }
239 }
240 let cholv = cholesky_upper(&vsub);
241 let yobs: Array1<f64> = obs.iter().map(|&k| yfull[k]).collect();
242 let ystar = solve_upper_transpose(&cholv, &yobs);
243
244 if xsub.iter().all(|&v| v == 0.0) {
245 df_residual[g] = nobs as f64;
246 let ss: f64 = ystar.iter().map(|&v| v * v).sum();
247 sigma[g] = (ss / nobs as f64).sqrt();
248 continue;
249 }
250 if nobs < nbeta {
251 continue;
254 }
255 let xstar = backsolve_t_cols(&cholv, &xsub);
256 let (q, r) = qr_econ(&xstar);
257 let qty = q.t().dot(&ystar);
258 let beta = solve_upper(&r, &qty);
259 let xtx_inv = xtx_inv_from_r(&r);
260 for j in 0..nbeta {
261 coefficients[[g, j]] = beta[j];
262 stdev_unscaled[[g, j]] = xtx_inv[[j, j]].sqrt();
263 }
264 let df = (nobs - nbeta) as f64;
265 df_residual[g] = df;
266 if df > 0.0 {
267 let ssy: f64 = ystar.iter().map(|&v| v * v).sum();
268 let ssfit: f64 = qty.iter().map(|&v| v * v).sum();
269 sigma[g] = ((ssy - ssfit).max(0.0) / df).sqrt();
270 }
271 }
272 }
273
274 let gnames: Vec<String> = if gene_names.len() == ngenes {
275 gene_names
276 } else {
277 (0..ngenes).map(|i| i.to_string()).collect()
278 };
279 let cnames: Vec<String> = if coef_names.len() == nbeta {
280 coef_names
281 } else {
282 (0..nbeta).map(|j| format!("coef{j}")).collect()
283 };
284
285 let mut fit = new_fit(
286 coefficients,
287 stdev_unscaled,
288 sigma,
289 df_residual,
290 cov_coefficients,
291 &gnames,
292 &cnames,
293 );
294 fit.amean = amean;
295 fit.design = Some(dwork);
296 Ok(fit)
297}
298
299#[cfg(test)]
300#[allow(clippy::excessive_precision)]
301mod tests {
302 use super::*;
303
304 fn rclose(a: f64, b: f64) -> bool {
305 (a - b).abs() <= 1e-7 * (1.0 + b.abs())
306 }
307
308 fn names(n: usize, p: usize) -> (Vec<String>, Vec<String>) {
309 (
310 (0..n).map(|i| format!("g{i}")).collect(),
311 (0..p).map(|j| format!("c{j}")).collect(),
312 )
313 }
314
315 fn dup_fixture() -> Array2<f64> {
317 let group = [0.0, 0.0, 0.0, 1.0, 1.0, 1.0];
318 let mut m = Array2::<f64>::zeros((8, 6));
319 for g0 in 0..8 {
320 let base = ((g0 as i64) % 7) - 3;
321 let extra = ((g0 as i64 * 5) % 11) - 5;
322 let lvl = 5.0 + (g0 % 10) as f64 * 0.2;
323 let eff = base as f64 * 0.25;
324 for k0 in 0..6 {
325 let noise = (((g0 as i64 * 13 + k0 as i64 * 17) % 19) - 9) as f64 * 0.04;
326 m[[g0, k0]] = lvl + eff * group[k0] + extra as f64 * 0.05 + noise;
327 }
328 }
329 m
330 }
331
332 fn dup_design() -> Array2<f64> {
333 let group = [0.0, 0.0, 0.0, 1.0, 1.0, 1.0];
334 let mut d = Array2::<f64>::zeros((6, 2));
335 for k0 in 0..6 {
336 d[[k0, 0]] = 1.0;
337 d[[k0, 1]] = group[k0];
338 }
339 d
340 }
341
342 fn block_fixture() -> Array2<f64> {
344 let mut m = Array2::<f64>::zeros((6, 8));
345 for g0 in 0..6 {
346 let lvl2 = 4.0 + (g0 % 5) as f64 * 0.3;
347 let slope = (((g0 as i64 * 2) % 7) - 3) as f64 * 0.15;
348 for k0 in 0..8 {
349 let x = (k0 % 3) as f64;
350 let noise2 = (((g0 as i64 * 11 + k0 as i64 * 5) % 17) - 8) as f64 * 0.05;
351 m[[g0, k0]] = lvl2 + slope * x + noise2;
352 }
353 }
354 m
355 }
356
357 fn block_design() -> Array2<f64> {
358 let mut d = Array2::<f64>::zeros((8, 2));
359 for k0 in 0..8 {
360 d[[k0, 0]] = 1.0;
361 d[[k0, 1]] = (k0 % 3) as f64;
362 }
363 d
364 }
365
366 #[test]
367 fn gls_series_ndups_fast_matches_r() {
368 let m = dup_fixture();
369 let (gn, cn) = names(4, 2);
370 let fit = gls_series(&m, &dup_design(), 2, 1, None, 0.4, None, gn, cn).unwrap();
371
372 let coef = [
373 [5.048333333333332, -0.73833333333333273],
374 [5.5733333333333341, 0.015000000000000385],
375 [5.9500000000000011, 0.2616666666666671],
376 [6.3266666666666627, 0.013333333333335673],
377 ];
378 let sig = [
379 0.38390381980014643,
380 0.30262541674013937,
381 0.27637319489621803,
382 0.94559882765216252,
383 ];
384 let amean = [
385 4.6791666666666671,
386 5.5808333333333335,
387 6.0808333333333335,
388 6.3333333333333339,
389 ];
390 for g in 0..4 {
391 assert!(rclose(fit.coefficients[[g, 0]], coef[g][0]), "coef0 g{g}");
392 assert!(rclose(fit.coefficients[[g, 1]], coef[g][1]), "coef1 g{g}");
393 assert!(rclose(fit.sigma[g], sig[g]), "sigma g{g}");
394 assert!(rclose(fit.amean[g], amean[g]), "amean g{g}");
395 assert!(rclose(fit.stdev_unscaled[[g, 0]], 0.48304589153964794));
396 assert!(rclose(fit.stdev_unscaled[[g, 1]], 0.68313005106397318));
397 assert_eq!(fit.df_residual[g], 10.0);
398 }
399 assert!(rclose(fit.cov_coefficients[[0, 0]], 0.23333333333333331));
400 assert!(rclose(fit.cov_coefficients[[0, 1]], -0.23333333333333331));
401 assert!(rclose(fit.cov_coefficients[[1, 1]], 0.46666666666666662));
402 }
403
404 #[test]
405 fn gls_series_block_fast_matches_r() {
406 let m = block_fixture();
407 let block = [0i64, 0, 1, 1, 2, 2, 3, 3];
408 let (gn, cn) = names(6, 2);
409 let fit = gls_series(&m, &block_design(), 1, 1, Some(&block), 0.3, None, gn, cn).unwrap();
410
411 let coef = [
412 [3.9896825396825411, -0.5024943310657598],
413 [4.461904761904762, -0.25646258503401381],
414 [4.340476190476191, 0.42517006802721086],
415 [5.0690476190476206, 0.25680272108843544],
416 [5.3119047619047643, -0.40646258503401367],
417 [3.6904761904761902, 0.27517006802721089],
418 ];
419 let sig = [
420 0.31011362178297863,
421 0.26676429774418003,
422 0.078484562609406242,
423 0.28938639706404889,
424 0.26676429774418015,
425 0.078484562609406339,
426 ];
427 let amean = [
428 3.5499999999999998,
429 4.2374999999999998,
430 4.7124999999999995,
431 5.2937500000000002,
432 4.9562500000000007,
433 3.9312499999999999,
434 ];
435 for g in 0..6 {
436 assert!(rclose(fit.coefficients[[g, 0]], coef[g][0]), "coef0 g{g}");
437 assert!(rclose(fit.coefficients[[g, 1]], coef[g][1]), "coef1 g{g}");
438 assert!(rclose(fit.sigma[g], sig[g]), "sigma g{g}");
439 assert!(rclose(fit.amean[g], amean[g]), "amean g{g}");
440 assert!(rclose(fit.stdev_unscaled[[g, 0]], 0.53748384988657005));
441 assert!(rclose(fit.stdev_unscaled[[g, 1]], 0.40629960014669614));
442 assert_eq!(fit.df_residual[g], 6.0);
443 }
444 assert!(rclose(fit.cov_coefficients[[0, 0]], 0.28888888888888903));
445 assert!(rclose(fit.cov_coefficients[[0, 1]], -0.14444444444444454));
446 assert!(rclose(fit.cov_coefficients[[1, 1]], 0.16507936507936516));
447 }
448
449 #[test]
450 fn gls_series_block_slow_one_na_matches_r() {
451 let mut m = block_fixture();
452 m[[2, 5]] = f64::NAN; let block = [0i64, 0, 1, 1, 2, 2, 3, 3];
454 let (gn, cn) = names(6, 2);
455 let fit = gls_series(&m, &block_design(), 1, 1, Some(&block), 0.3, None, gn, cn).unwrap();
456
457 assert_eq!(fit.df_residual[2], 5.0);
459 assert!(rclose(fit.coefficients[[2, 0]], 4.3380116959064319));
460 assert!(rclose(fit.coefficients[[2, 1]], 0.43538011695906442));
461 assert!(rclose(fit.sigma[2], 0.083551863073296567));
462 assert!(rclose(fit.stdev_unscaled[[2, 0]], 0.54022713197881478));
463 assert!(rclose(fit.stdev_unscaled[[2, 1]], 0.46456642400542719));
464 assert!(rclose(fit.amean[2], 4.6499999999999995));
465
466 assert_eq!(fit.df_residual[0], 6.0);
468 assert!(rclose(fit.coefficients[[0, 0]], 3.9896825396825411));
469 assert!(rclose(fit.sigma[0], 0.31011362178297869));
470 assert!(rclose(fit.coefficients[[5, 1]], 0.27517006802721089));
471 assert!(rclose(fit.sigma[5], 0.078484562609406339));
472
473 assert!(rclose(fit.cov_coefficients[[0, 0]], 0.28888888888888903));
475 assert!(rclose(fit.cov_coefficients[[1, 1]], 0.16507936507936516));
476 }
477}