use anyhow::{bail, Result};
use ndarray::Array2;
use crate::fit::{lmfit, MArrayLM};
pub fn design_i2m(design: &Array2<f64>) -> Array2<f64> {
let nrow = design.nrows();
assert_eq!(nrow % 2, 0, "design must have two rows per array");
let narrays = nrow / 2;
let p = design.ncols();
let mut out = Array2::<f64>::zeros((narrays, p));
for a in 0..narrays {
for j in 0..p {
out[[a, j]] = design[[2 * a + 1, j]] - design[[2 * a, j]];
}
}
out
}
pub fn design_i2a(design: &Array2<f64>) -> Array2<f64> {
let nrow = design.nrows();
assert_eq!(nrow % 2, 0, "design must have two rows per array");
let narrays = nrow / 2;
let p = design.ncols();
let mut out = Array2::<f64>::zeros((narrays, p));
for a in 0..narrays {
for j in 0..p {
out[[a, j]] = 0.5 * (design[[2 * a, j]] + design[[2 * a + 1, j]]);
}
}
out
}
pub fn exprs_ma(m: &Array2<f64>, a: &Array2<f64>) -> Array2<f64> {
let ngenes = m.nrows();
let narrays = m.ncols();
Array2::from_shape_fn((ngenes, 2 * narrays), |(g, k)| {
let j = k / 2;
if k % 2 == 0 {
a[[g, j]] - m[[g, j]] / 2.0
} else {
a[[g, j]] + m[[g, j]] / 2.0
}
})
}
pub fn lmsc_fit(
m: &Array2<f64>,
a: &Array2<f64>,
design: &Array2<f64>,
correlation: f64,
gene_names: Vec<String>,
coef_names: Vec<String>,
) -> Result<MArrayLM> {
let ngenes = m.nrows();
let narrays = m.ncols();
if a.dim() != (ngenes, narrays) {
bail!("dimensions of M and A don't match");
}
if m.iter().chain(a.iter()).any(|v| !v.is_finite()) {
bail!("Missing or infinite values found in M or A");
}
let ny = 2 * narrays;
if design.nrows() != ny {
bail!(
"the number of rows of the design matrix ({}) should be twice the number of arrays ({})",
design.nrows(),
narrays
);
}
if correlation.is_nan() {
bail!("intra-spot correlation is NA");
}
if correlation.abs() >= 1.0 {
bail!("correlation must be strictly between -1 and 1");
}
let sd_m = (2.0 * (1.0 - correlation)).sqrt();
let sd_a = ((1.0 + correlation) / 2.0).sqrt();
let exprs = Array2::from_shape_fn((ngenes, ny), |(g, k)| {
if k < narrays {
m[[g, k]] / sd_m
} else {
a[[g, k - narrays]] / sd_a
}
});
let design_m = design_i2m(design);
let design_a = design_i2a(design);
let p = design.ncols();
let x = Array2::from_shape_fn((ny, p), |(k, j)| {
if k < narrays {
design_m[[k, j]] / sd_m
} else {
design_a[[k - narrays, j]] / sd_a
}
});
let mut fit = lmfit(&exprs, &x, gene_names, coef_names)?;
fit.amean = (0..ngenes)
.map(|g| (0..narrays).map(|j| a[[g, j]]).sum::<f64>() / narrays as f64)
.collect();
fit.design = Some(design.clone());
Ok(fit)
}
#[cfg(test)]
mod tests {
use super::*;
use ndarray::array;
fn design() -> Array2<f64> {
array![
[1.0, 0.5],
[0.0, 2.0],
[-1.0, 1.0],
[3.0, 0.0],
[2.0, -1.0],
[0.5, 0.5],
]
}
#[test]
fn design_i2m_matches_r() {
let got = design_i2m(&design());
let want = array![[-1.0, 1.5], [4.0, -1.0], [-1.5, 1.5]];
assert_eq!(got, want);
}
#[test]
fn design_i2a_matches_r() {
let got = design_i2a(&design());
let want = array![[0.5, 1.25], [1.0, 0.5], [1.25, -0.25]];
assert_eq!(got, want);
}
#[test]
fn exprs_ma_matches_r() {
let m = array![[0.5, -1.0], [1.0, 2.0], [-0.5, 0.0]];
let a = array![[8.0, 9.0], [10.0, 11.0], [7.0, 6.0]];
let got = exprs_ma(&m, &a);
let want = array![
[7.75, 8.25, 9.5, 8.5],
[9.5, 10.5, 10.0, 12.0],
[7.25, 6.75, 6.0, 6.0],
];
assert_eq!(got, want);
}
#[allow(clippy::excessive_precision)]
fn rclose(a: f64, b: f64) -> bool {
(a - b).abs() <= 1e-9 * (1.0 + b.abs())
}
#[allow(clippy::excessive_precision)]
#[test]
fn lmsc_fit_matches_r() {
let (ngenes, narrays) = (6usize, 4usize);
let mut m = Array2::zeros((ngenes, narrays));
let mut a = Array2::zeros((ngenes, narrays));
for g0 in 0..ngenes {
for j0 in 0..narrays {
let (g, j) = (g0 as i64, j0 as i64);
m[[g0, j0]] = ((g * 3 + j * 2) % 7 - 3) as f64 * 0.4;
a[[g0, j0]] = 8.0 + (g % 4) as f64 * 0.5 + ((g + j * 3) % 5) as f64 * 0.3;
}
}
let design = array![
[1.0, 0.0, 0.0],
[1.0, 1.0, 0.0],
[1.0, 0.0, 0.0],
[1.0, 1.0, 0.0],
[1.0, 0.0, 1.0],
[1.0, 1.0, 1.0],
[1.0, 0.0, 1.0],
[1.0, 1.0, 1.0],
];
let names = vec!["Int".into(), "Dye".into(), "Trt".into()];
let fit = lmsc_fit(&m, &a, &design, 0.3, vec![], names).unwrap();
let coef = [
[
8.4499999999999993,
-2.0952455890385942e-15,
0.3000000000000001,
],
[
9.3499999999999996,
-0.20000000000000265,
-0.44999999999999968,
],
[9.1499999999999986, 0.29999999999999777, 0.3000000000000011],
[10.049999999999999, 0.099999999999997938, 0.3000000000000001],
[
8.9499999999999993,
-0.10000000000000214,
-0.44999999999999934,
],
[9.0999999999999996, -0.30000000000000238, 0.3000000000000001],
];
for (g, row) in coef.iter().enumerate() {
for (j, &c) in row.iter().enumerate() {
assert!(rclose(fit.coefficients[[g, j]], c), "coef[{g},{j}]");
}
}
let su = [0.64226162893325656, 0.5916079783099617, 0.80622577482985502];
for g in 0..ngenes {
for (j, &s) in su.iter().enumerate() {
assert!(rclose(fit.stdev_unscaled[[g, j]], s), "stdev[{g},{j}]");
}
}
let sigma = [
0.84046036573632044,
0.6907552802135184,
0.66926234610359303,
0.66926234610359581,
0.71912645420875421,
0.76575036818380149,
];
for (g, &s) in sigma.iter().enumerate() {
assert!(rclose(fit.sigma[g], s), "sigma[{g}]");
assert!((fit.df_residual[g] - 5.0).abs() < 1e-12, "df[{g}]");
}
let cov = [
[
0.41250000000000009,
-0.17500000000000016,
-0.32500000000000001,
],
[
-0.17500000000000016,
0.35000000000000009,
8.5386183879315777e-17,
],
[
-0.32500000000000001,
8.5386183879315777e-17,
0.65000000000000013,
],
];
for (i, row) in cov.iter().enumerate() {
for (j, &c) in row.iter().enumerate() {
assert!(rclose(fit.cov_coefficients[[i, j]], c), "cov[{i},{j}]");
}
}
let amean = [
8.5999999999999996,
9.0250000000000004,
9.4499999999999993,
10.25,
8.6750000000000007,
9.0999999999999996,
];
for (g, &am) in amean.iter().enumerate() {
assert!(rclose(fit.amean[g], am), "amean[{g}]");
}
assert_eq!(fit.design.as_ref().unwrap(), &design);
}
}