use anyhow::{bail, Result};
use ndarray::{Array1, Array2};
use crate::dups::{kron_rows, unwrapdups};
use crate::fit::{new_fit, MArrayLM};
use crate::linalg::{cholesky_upper, qr_econ, solve_upper, solve_upper_transpose, xtx_inv_from_r};
fn nanmean<'a, I: IntoIterator<Item = &'a f64>>(it: I) -> f64 {
let mut sum = 0.0;
let mut cnt = 0usize;
for &v in it {
if v.is_finite() {
sum += v;
cnt += 1;
}
}
if cnt > 0 {
sum / cnt as f64
} else {
f64::NAN
}
}
fn gls_amean(exprs: &Array2<f64>, ndups: usize, spacing: usize) -> Array1<f64> {
let nspots = exprs.nrows();
let spotmean: Vec<f64> = (0..nspots).map(|i| nanmean(exprs.row(i))).collect();
if ndups <= 1 {
return Array1::from(spotmean);
}
let col = Array2::from_shape_vec((nspots, 1), spotmean).expect("column shape");
let uw = unwrapdups(&col, ndups, spacing.max(1));
(0..uw.nrows()).map(|g| nanmean(uw.row(g))).collect()
}
fn backsolve_t_cols(u: &Array2<f64>, b: &Array2<f64>) -> Array2<f64> {
let (n, k) = b.dim();
let mut out = Array2::<f64>::zeros((n, k));
for j in 0..k {
let sol = solve_upper_transpose(u, &b.column(j).to_owned());
out.column_mut(j).assign(&sol);
}
out
}
#[allow(clippy::too_many_arguments)]
pub fn gls_series(
exprs: &Array2<f64>,
design: &Array2<f64>,
ndups: usize,
spacing: usize,
block: Option<&[i64]>,
correlation: f64,
weights: Option<&Array2<f64>>,
gene_names: Vec<String>,
coef_names: Vec<String>,
) -> Result<MArrayLM> {
let narrays0 = exprs.ncols();
if design.nrows() != narrays0 {
bail!(
"design rows ({}) must match number of arrays ({})",
design.nrows(),
narrays0
);
}
if correlation.abs() >= 1.0 {
bail!("correlation is 1 or -1, so the model is degenerate");
}
let nbeta = design.ncols();
let amean = gls_amean(exprs, ndups, spacing);
let mut m = exprs.clone();
let mut wmat: Option<Array2<f64>> = None;
if let Some(w) = weights {
if w.dim() != exprs.dim() {
bail!("weights dimensions must match exprs");
}
let mut wc = w.clone();
for i in 0..wc.nrows() {
for j in 0..wc.ncols() {
if wc[[i, j]].is_nan() {
wc[[i, j]] = 0.0;
}
if wc[[i, j]] < 1e-15 {
m[[i, j]] = f64::NAN;
wc[[i, j]] = f64::NAN;
}
}
}
wmat = Some(wc);
}
let (cormatrix, mwork, wwork, dwork) = match block {
None => {
let nd = if ndups < 2 { 1 } else { ndups };
let corr = if ndups < 2 { 0.0 } else { correlation };
let sp = spacing.max(1);
let mu = unwrapdups(&m, nd, sp);
let wu = wmat.as_ref().map(|w| unwrapdups(w, nd, sp));
let du = kron_rows(design, nd);
let n = du.nrows();
let mut cor = Array2::<f64>::zeros((n, n));
for i in 0..n {
for j in 0..n {
if i / nd == j / nd {
cor[[i, j]] = corr;
}
}
cor[[i, i]] = 1.0;
}
(cor, mu, wu, du)
}
Some(b) => {
if b.len() != narrays0 {
bail!("length of block does not match number of arrays");
}
let n = narrays0;
let mut cor = Array2::<f64>::zeros((n, n));
for i in 0..n {
for j in 0..n {
if b[i] == b[j] {
cor[[i, j]] = correlation;
}
}
cor[[i, i]] = 1.0;
}
(cor, m, wmat, design.clone())
}
};
let ngenes = mwork.nrows();
let n = mwork.ncols();
let all_finite = mwork.iter().all(|v| v.is_finite());
let no_probe_wts = all_finite && wwork.is_none();
let mut coefficients = Array2::<f64>::from_elem((ngenes, nbeta), f64::NAN);
let mut stdev_unscaled = Array2::<f64>::from_elem((ngenes, nbeta), f64::NAN);
let mut sigma = Array1::<f64>::from_elem(ngenes, f64::NAN);
let mut df_residual = Array1::<f64>::zeros(ngenes);
let cholv_full = cholesky_upper(&cormatrix);
let xstar_full = backsolve_t_cols(&cholv_full, &dwork);
let (qf, rf) = qr_econ(&xstar_full);
let cov_coefficients = xtx_inv_from_r(&rf);
if no_probe_wts {
let df = (n - nbeta) as f64;
let stdev_row: Vec<f64> = (0..nbeta)
.map(|j| cov_coefficients[[j, j]].sqrt())
.collect();
for g in 0..ngenes {
let zstar = solve_upper_transpose(&cholv_full, &mwork.row(g).to_owned());
let qty = qf.t().dot(&zstar);
let beta = solve_upper(&rf, &qty);
for j in 0..nbeta {
coefficients[[g, j]] = beta[j];
stdev_unscaled[[g, j]] = stdev_row[j];
}
df_residual[g] = df;
if df > 0.0 {
let ssy: f64 = zstar.iter().map(|&v| v * v).sum();
let ssfit: f64 = qty.iter().map(|&v| v * v).sum();
sigma[g] = ((ssy - ssfit).max(0.0) / df).sqrt();
}
}
} else {
for g in 0..ngenes {
let yfull = mwork.row(g);
let obs: Vec<usize> = (0..n).filter(|&k| yfull[k].is_finite()).collect();
let nobs = obs.len();
if nobs == 0 {
continue;
}
let mut xsub = Array2::<f64>::zeros((nobs, nbeta));
let mut vsub = Array2::<f64>::zeros((nobs, nobs));
for (ri, &ki) in obs.iter().enumerate() {
for j in 0..nbeta {
xsub[[ri, j]] = dwork[[ki, j]];
}
for (cj, &kj) in obs.iter().enumerate() {
vsub[[ri, cj]] = cormatrix[[ki, kj]];
}
}
if let Some(w) = &wwork {
let wrs: Vec<f64> = obs.iter().map(|&k| 1.0 / w[[g, k]].sqrt()).collect();
for ri in 0..nobs {
for cj in 0..nobs {
vsub[[ri, cj]] *= wrs[ri] * wrs[cj];
}
}
}
let cholv = cholesky_upper(&vsub);
let yobs: Array1<f64> = obs.iter().map(|&k| yfull[k]).collect();
let ystar = solve_upper_transpose(&cholv, &yobs);
if xsub.iter().all(|&v| v == 0.0) {
df_residual[g] = nobs as f64;
let ss: f64 = ystar.iter().map(|&v| v * v).sum();
sigma[g] = (ss / nobs as f64).sqrt();
continue;
}
if nobs < nbeta {
continue;
}
let xstar = backsolve_t_cols(&cholv, &xsub);
let (q, r) = qr_econ(&xstar);
let qty = q.t().dot(&ystar);
let beta = solve_upper(&r, &qty);
let xtx_inv = xtx_inv_from_r(&r);
for j in 0..nbeta {
coefficients[[g, j]] = beta[j];
stdev_unscaled[[g, j]] = xtx_inv[[j, j]].sqrt();
}
let df = (nobs - nbeta) as f64;
df_residual[g] = df;
if df > 0.0 {
let ssy: f64 = ystar.iter().map(|&v| v * v).sum();
let ssfit: f64 = qty.iter().map(|&v| v * v).sum();
sigma[g] = ((ssy - ssfit).max(0.0) / df).sqrt();
}
}
}
let gnames: Vec<String> = if gene_names.len() == ngenes {
gene_names
} else {
(0..ngenes).map(|i| i.to_string()).collect()
};
let cnames: Vec<String> = if coef_names.len() == nbeta {
coef_names
} else {
(0..nbeta).map(|j| format!("coef{j}")).collect()
};
let mut fit = new_fit(
coefficients,
stdev_unscaled,
sigma,
df_residual,
cov_coefficients,
&gnames,
&cnames,
);
fit.amean = amean;
fit.design = Some(dwork);
Ok(fit)
}
#[cfg(test)]
#[allow(clippy::excessive_precision)]
mod tests {
use super::*;
fn rclose(a: f64, b: f64) -> bool {
(a - b).abs() <= 1e-7 * (1.0 + b.abs())
}
fn names(n: usize, p: usize) -> (Vec<String>, Vec<String>) {
(
(0..n).map(|i| format!("g{i}")).collect(),
(0..p).map(|j| format!("c{j}")).collect(),
)
}
fn dup_fixture() -> Array2<f64> {
let group = [0.0, 0.0, 0.0, 1.0, 1.0, 1.0];
let mut m = Array2::<f64>::zeros((8, 6));
for g0 in 0..8 {
let base = ((g0 as i64) % 7) - 3;
let extra = ((g0 as i64 * 5) % 11) - 5;
let lvl = 5.0 + (g0 % 10) as f64 * 0.2;
let eff = base as f64 * 0.25;
for k0 in 0..6 {
let noise = (((g0 as i64 * 13 + k0 as i64 * 17) % 19) - 9) as f64 * 0.04;
m[[g0, k0]] = lvl + eff * group[k0] + extra as f64 * 0.05 + noise;
}
}
m
}
fn dup_design() -> Array2<f64> {
let group = [0.0, 0.0, 0.0, 1.0, 1.0, 1.0];
let mut d = Array2::<f64>::zeros((6, 2));
for k0 in 0..6 {
d[[k0, 0]] = 1.0;
d[[k0, 1]] = group[k0];
}
d
}
fn block_fixture() -> Array2<f64> {
let mut m = Array2::<f64>::zeros((6, 8));
for g0 in 0..6 {
let lvl2 = 4.0 + (g0 % 5) as f64 * 0.3;
let slope = (((g0 as i64 * 2) % 7) - 3) as f64 * 0.15;
for k0 in 0..8 {
let x = (k0 % 3) as f64;
let noise2 = (((g0 as i64 * 11 + k0 as i64 * 5) % 17) - 8) as f64 * 0.05;
m[[g0, k0]] = lvl2 + slope * x + noise2;
}
}
m
}
fn block_design() -> Array2<f64> {
let mut d = Array2::<f64>::zeros((8, 2));
for k0 in 0..8 {
d[[k0, 0]] = 1.0;
d[[k0, 1]] = (k0 % 3) as f64;
}
d
}
#[test]
fn gls_series_ndups_fast_matches_r() {
let m = dup_fixture();
let (gn, cn) = names(4, 2);
let fit = gls_series(&m, &dup_design(), 2, 1, None, 0.4, None, gn, cn).unwrap();
let coef = [
[5.048333333333332, -0.73833333333333273],
[5.5733333333333341, 0.015000000000000385],
[5.9500000000000011, 0.2616666666666671],
[6.3266666666666627, 0.013333333333335673],
];
let sig = [
0.38390381980014643,
0.30262541674013937,
0.27637319489621803,
0.94559882765216252,
];
let amean = [
4.6791666666666671,
5.5808333333333335,
6.0808333333333335,
6.3333333333333339,
];
for g in 0..4 {
assert!(rclose(fit.coefficients[[g, 0]], coef[g][0]), "coef0 g{g}");
assert!(rclose(fit.coefficients[[g, 1]], coef[g][1]), "coef1 g{g}");
assert!(rclose(fit.sigma[g], sig[g]), "sigma g{g}");
assert!(rclose(fit.amean[g], amean[g]), "amean g{g}");
assert!(rclose(fit.stdev_unscaled[[g, 0]], 0.48304589153964794));
assert!(rclose(fit.stdev_unscaled[[g, 1]], 0.68313005106397318));
assert_eq!(fit.df_residual[g], 10.0);
}
assert!(rclose(fit.cov_coefficients[[0, 0]], 0.23333333333333331));
assert!(rclose(fit.cov_coefficients[[0, 1]], -0.23333333333333331));
assert!(rclose(fit.cov_coefficients[[1, 1]], 0.46666666666666662));
}
#[test]
fn gls_series_block_fast_matches_r() {
let m = block_fixture();
let block = [0i64, 0, 1, 1, 2, 2, 3, 3];
let (gn, cn) = names(6, 2);
let fit = gls_series(&m, &block_design(), 1, 1, Some(&block), 0.3, None, gn, cn).unwrap();
let coef = [
[3.9896825396825411, -0.5024943310657598],
[4.461904761904762, -0.25646258503401381],
[4.340476190476191, 0.42517006802721086],
[5.0690476190476206, 0.25680272108843544],
[5.3119047619047643, -0.40646258503401367],
[3.6904761904761902, 0.27517006802721089],
];
let sig = [
0.31011362178297863,
0.26676429774418003,
0.078484562609406242,
0.28938639706404889,
0.26676429774418015,
0.078484562609406339,
];
let amean = [
3.5499999999999998,
4.2374999999999998,
4.7124999999999995,
5.2937500000000002,
4.9562500000000007,
3.9312499999999999,
];
for g in 0..6 {
assert!(rclose(fit.coefficients[[g, 0]], coef[g][0]), "coef0 g{g}");
assert!(rclose(fit.coefficients[[g, 1]], coef[g][1]), "coef1 g{g}");
assert!(rclose(fit.sigma[g], sig[g]), "sigma g{g}");
assert!(rclose(fit.amean[g], amean[g]), "amean g{g}");
assert!(rclose(fit.stdev_unscaled[[g, 0]], 0.53748384988657005));
assert!(rclose(fit.stdev_unscaled[[g, 1]], 0.40629960014669614));
assert_eq!(fit.df_residual[g], 6.0);
}
assert!(rclose(fit.cov_coefficients[[0, 0]], 0.28888888888888903));
assert!(rclose(fit.cov_coefficients[[0, 1]], -0.14444444444444454));
assert!(rclose(fit.cov_coefficients[[1, 1]], 0.16507936507936516));
}
#[test]
fn gls_series_block_slow_one_na_matches_r() {
let mut m = block_fixture();
m[[2, 5]] = f64::NAN; let block = [0i64, 0, 1, 1, 2, 2, 3, 3];
let (gn, cn) = names(6, 2);
let fit = gls_series(&m, &block_design(), 1, 1, Some(&block), 0.3, None, gn, cn).unwrap();
assert_eq!(fit.df_residual[2], 5.0);
assert!(rclose(fit.coefficients[[2, 0]], 4.3380116959064319));
assert!(rclose(fit.coefficients[[2, 1]], 0.43538011695906442));
assert!(rclose(fit.sigma[2], 0.083551863073296567));
assert!(rclose(fit.stdev_unscaled[[2, 0]], 0.54022713197881478));
assert!(rclose(fit.stdev_unscaled[[2, 1]], 0.46456642400542719));
assert!(rclose(fit.amean[2], 4.6499999999999995));
assert_eq!(fit.df_residual[0], 6.0);
assert!(rclose(fit.coefficients[[0, 0]], 3.9896825396825411));
assert!(rclose(fit.sigma[0], 0.31011362178297869));
assert!(rclose(fit.coefficients[[5, 1]], 0.27517006802721089));
assert!(rclose(fit.sigma[5], 0.078484562609406339));
assert!(rclose(fit.cov_coefficients[[0, 0]], 0.28888888888888903));
assert!(rclose(fit.cov_coefficients[[1, 1]], 0.16507936507936516));
}
}