use anyhow::{bail, Result};
use ndarray::{Array2, ArrayView1, Axis};
use crate::norm::{normalize_between_arrays, NormalizeMethod};
use crate::normexp::normexp_signal;
fn which_max(v: ArrayView1<f64>) -> usize {
let mut bi = 0usize;
let mut bv = v[0];
for i in 1..v.len() {
if v[i] > bv {
bv = v[i];
bi = i;
}
}
bi
}
fn which_min(v: ArrayView1<f64>) -> usize {
let mut bi = 0usize;
let mut bv = v[0];
for i in 1..v.len() {
if v[i] < bv {
bv = v[i];
bi = i;
}
}
bi
}
fn median(v: &[f64]) -> f64 {
let mut s: Vec<f64> = v.to_vec();
s.sort_by(|a, b| a.partial_cmp(b).expect("finite"));
let n = s.len();
if n % 2 == 1 {
s[n / 2]
} else {
(s[n / 2 - 1] + s[n / 2]) * 0.5
}
}
fn mad(v: &[f64]) -> f64 {
let center = median(v);
let dev: Vec<f64> = v.iter().map(|&x| (x - center).abs()).collect();
1.4826 * median(&dev)
}
fn huber(y: &[f64], k: f64, tol: f64) -> Result<(f64, f64)> {
let n = y.len() as f64;
let s = mad(y);
if s == 0.0 {
bail!("cannot estimate scale: MAD is zero for negative controls");
}
let mut mu = median(y);
loop {
let lo = mu - k * s;
let hi = mu + k * s;
let mu1 = y.iter().map(|&v| v.clamp(lo, hi)).sum::<f64>() / n;
if (mu - mu1).abs() < tol * s {
break;
}
mu = mu1;
}
Ok((mu, s))
}
pub fn normexp_fit_control(
x: &Array2<f64>,
status: &[String],
negctrl: &str,
regular: &str,
robust: bool,
) -> Result<Array2<f64>> {
let (nprobe, narray) = x.dim();
if status.len() != nprobe {
bail!(
"status length ({}) must match number of probes ({})",
status.len(),
nprobe
);
}
let reg: Vec<usize> = (0..nprobe)
.filter(|&g| status[g].eq_ignore_ascii_case(regular))
.collect();
let neg: Vec<usize> = (0..nprobe)
.filter(|&g| status[g].eq_ignore_ascii_case(negctrl))
.collect();
if reg.is_empty() {
bail!("No regular probes found");
}
if neg.len() < 2 {
bail!("Fewer than two negative control probes found");
}
let nneg = neg.len() as f64;
let nreg = reg.len() as f64;
let mut par = Array2::<f64>::zeros((narray, 3));
for j in 0..narray {
let (mu, sigma) = if robust {
let logneg: Vec<f64> = neg.iter().map(|&g| x[[g, j]].ln()).collect();
let (m, s) = huber(&logneg, 1.5, 1e-6)?;
let mu = (m + s * s / 2.0).exp();
let omega = (s * s).exp();
let sigma = (omega * (omega - 1.0)).sqrt() * m.exp();
(mu, sigma)
} else {
let mu = neg.iter().map(|&g| x[[g, j]]).sum::<f64>() / nneg;
let ss = neg
.iter()
.map(|&g| {
let d = x[[g, j]] - mu;
d * d
})
.sum::<f64>();
(mu, (ss / (nneg - 1.0)).sqrt())
};
let mean_reg = reg.iter().map(|&g| x[[g, j]]).sum::<f64>() / nreg;
let alpha = (mean_reg - mu).max(10.0);
par[[j, 0]] = mu;
par[[j, 1]] = sigma.ln();
par[[j, 2]] = alpha.ln();
}
Ok(par)
}
pub fn normexp_fit_detection_p(x: &Array2<f64>, detection_p: &Array2<f64>) -> Result<Array2<f64>> {
let (nprobe, narray) = x.dim();
if detection_p.dim() != x.dim() {
bail!("detection p-value matrix must match intensity dimensions");
}
let mut dp = detection_p.clone();
let y0 = x.column(0);
let p0 = detection_p.column(0);
if p0[which_max(y0)] < p0[which_min(y0)] {
dp.mapv_inplace(|v| 1.0 - v);
}
let mut par = Array2::<f64>::zeros((narray, 3));
for j in 0..narray {
let mut ord: Vec<usize> = (0..nprobe).collect();
ord.sort_by(|&a, &b| {
dp[[a, j]]
.partial_cmp(&dp[[b, j]])
.expect("finite p")
.then(x[[a, j]].partial_cmp(&x[[b, j]]).expect("finite y"))
});
let ys: Vec<f64> = ord.iter().map(|&i| x[[i, j]]).collect();
let ps: Vec<f64> = ord.iter().map(|&i| dp[[i, j]]).collect();
let jidx: Vec<usize> = (1..nprobe).filter(|&k| ps[k] != ps[k - 1]).collect();
let ync: Vec<f64> = jidx.iter().map(|&k| (ys[k] + ys[k - 1]) * 0.5).collect();
let d: Vec<f64> = jidx.iter().map(|&k| ps[k] - ps[k - 1]).collect();
let dmin = d.iter().copied().fold(f64::INFINITY, f64::min);
let freq: Vec<f64> = d.iter().map(|&v| v / dmin).collect();
let sumw: f64 = freq.iter().sum();
let nn: f64 = sumw;
let mu = ync
.iter()
.zip(freq.iter())
.map(|(&yv, &w)| yv * w)
.sum::<f64>()
/ sumw;
let var_w = ync
.iter()
.zip(freq.iter())
.map(|(&yv, &w)| {
let dd = yv - mu;
dd * dd * w
})
.sum::<f64>()
/ sumw;
let sigma = (var_w * nn / (nn - 1.0)).sqrt();
let mean_x = (0..nprobe).map(|g| x[[g, j]]).sum::<f64>() / nprobe as f64;
let alpha = (mean_x - mu).max(10.0);
par[[j, 0]] = mu;
par[[j, 1]] = sigma.ln();
par[[j, 2]] = alpha.ln();
}
Ok(par)
}
#[allow(clippy::too_many_arguments)]
pub fn nec(
x: &Array2<f64>,
status: Option<&[String]>,
negctrl: &str,
regular: &str,
offset: f64,
robust: bool,
detection_p: Option<&Array2<f64>>,
) -> Result<Array2<f64>> {
let has_neg = match status {
Some(s) => s.iter().any(|t| t.eq_ignore_ascii_case(negctrl)),
None => false,
};
let par = if has_neg {
normexp_fit_control(x, status.expect("status present"), negctrl, regular, robust)?
} else {
match detection_p {
Some(dp) => normexp_fit_detection_p(x, dp)?,
None => bail!("no negative control probes found and no detection p-values supplied"),
}
};
let (nprobe, narray) = x.dim();
let mut out = Array2::<f64>::zeros((nprobe, narray));
for j in 0..narray {
let col: Vec<f64> = (0..nprobe).map(|g| x[[g, j]]).collect();
let p = [par[[j, 0]], par[[j, 1]], par[[j, 2]]];
let sig = normexp_signal(&p, &col);
for g in 0..nprobe {
out[[g, j]] = sig[g] + offset;
}
}
Ok(out)
}
#[allow(clippy::too_many_arguments)]
pub fn neqc(
x: &Array2<f64>,
status: Option<&[String]>,
negctrl: &str,
regular: &str,
offset: f64,
robust: bool,
detection_p: Option<&Array2<f64>>,
) -> Result<Array2<f64>> {
let xbg = nec(x, status, negctrl, regular, offset, robust, detection_p)?;
let qn = normalize_between_arrays(&xbg, NormalizeMethod::Quantile);
let y = qn.mapv(|v| v.log2());
match status {
Some(s) => {
let reg: Vec<usize> = (0..x.nrows())
.filter(|&g| s[g].eq_ignore_ascii_case(regular))
.collect();
Ok(y.select(Axis(0), ®))
}
None => Ok(y),
}
}
#[cfg(test)]
#[allow(clippy::excessive_precision, clippy::approx_constant)]
mod tests {
use super::*;
use ndarray::Array2;
fn rclose(a: f64, b: f64) -> bool {
(a - b).abs() <= 1e-8 * (1.0 + b.abs())
}
fn ctrl_fixture() -> (Array2<f64>, Vec<String>) {
let mut x = Array2::<f64>::zeros((12, 4));
for g0 in 0..12i64 {
for j0 in 0..4i64 {
let v = if g0 < 4 {
55 + g0 * 6 + j0 * 5 + ((g0 * 3 + j0 * 2) % 7)
} else {
let r = g0 - 4;
180 + r * 55 + j0 * 12 + ((r * 5 + j0 * 3) % 9) * 7
};
x[[g0 as usize, j0 as usize]] = v as f64;
}
}
let mut status: Vec<String> = vec!["negative".to_string(); 4];
status.extend(vec!["regular".to_string(); 8]);
(x, status)
}
#[test]
fn normexp_fit_control_matches_r() {
let (x, status) = ctrl_fixture();
let par = normexp_fit_control(&x, &status, "negative", "regular", false).unwrap();
let expect = [
[66.75, 2.2168942846162354, 5.8103922097144594],
[72.0, 2.1048277043665475, 5.8226760045966115],
[77.25, 2.2168942846162354, 5.8575759478356177],
[82.5, 1.8648507243170958, 5.8692969131337742],
];
for j in 0..4 {
for k in 0..3 {
assert!(rclose(par[[j, k]], expect[j][k]), "par[{j},{k}]");
}
}
}
#[test]
fn nec_control_matches_r() {
let (x, status) = ctrl_fixture();
let out = nec(&x, Some(&status), "negative", "regular", 16.0, false, None).unwrap();
assert!(rclose(out[[0, 0]], 20.305342016641642));
assert!(rclose(out[[0, 3]], 19.357572919956016));
assert!(rclose(out[[4, 0]], 128.99756554307118));
assert!(rclose(out[[4, 2]], 184.50919971418364));
assert!(rclose(out[[11, 0]], 569.99756554307112));
assert!(rclose(out[[11, 3]], 590.38229755178907));
assert!(rclose(out[[7, 1]], 300.8007152546553));
}
#[test]
fn neqc_control_matches_r() {
let (x, status) = ctrl_fixture();
let y = neqc(&x, Some(&status), "negative", "regular", 16.0, false, None).unwrap();
assert_eq!(y.dim(), (8, 4));
let col0 = [
7.2754023606686635,
7.8402897759719394,
8.087051426298979,
8.3681678774159849,
8.6033386841443473,
8.8054938193940089,
8.937145573861951,
9.1406315482768701,
];
for g in 0..8 {
for j in 0..4 {
assert!(rclose(y[[g, j]], col0[g]), "neqc[{g},{j}]");
}
}
}
#[test]
fn huber_matches_mass() {
let (x, _status) = ctrl_fixture();
let logneg: Vec<f64> = (0..4).map(|g| x[[g, 0]].ln()).collect();
let (mu, s) = huber(&logneg, 1.5, 1e-6).unwrap();
assert!(rclose(mu, 4.1968233644781909), "huber mu: {mu}");
assert!(rclose(s, 0.11757390886994287), "huber s: {s}");
}
#[test]
fn normexp_fit_control_robust_matches_r() {
let (x, status) = ctrl_fixture();
let par = normexp_fit_control(&x, &status, "negative", "regular", true).unwrap();
let expect = [
[66.935882025964389, 2.0665069321187759, 5.8098351046697276],
[72.056377672121371, 2.0432833532410433, 5.8225091311190793],
[77.313938618332429, 2.1404505355358392, 5.8573931841018823],
[82.704902206490445, 2.0928195889897081, 5.868717925760583],
];
for j in 0..4 {
for k in 0..3 {
assert!(rclose(par[[j, k]], expect[j][k]), "robust par[{j},{k}]");
}
}
}
#[test]
fn nec_neqc_robust_matches_r() {
let (x, status) = ctrl_fixture();
let out = nec(&x, Some(&status), "negative", "regular", 16.0, true, None).unwrap();
assert!(rclose(out[[0, 0]], 19.423283483241832));
assert!(rclose(out[[0, 3]], 20.511023200579992));
assert!(rclose(out[[4, 0]], 128.87715061732911));
assert!(rclose(out[[4, 1]], 156.76738829317546));
assert!(rclose(out[[11, 0]], 569.87715061732911));
assert!(rclose(out[[11, 3]], 590.10929673355065));
let y = neqc(&x, Some(&status), "negative", "regular", 16.0, true, None).unwrap();
assert_eq!(y.dim(), (8, 4));
let col = [
7.2743390086104114,
7.8395710271311723,
8.0864456985233844,
8.3676694099002535,
8.6029152049207713,
8.8051257166846142,
8.9368095786763639,
9.140339757863492,
];
for g in 0..8 {
for j in 0..4 {
assert!(rclose(y[[g, j]], col[g]), "robust neqc[{g},{j}]");
}
}
}
fn detp_fixture() -> (Array2<f64>, Array2<f64>) {
let mut x = Array2::<f64>::zeros((8, 3));
let mut dp = Array2::<f64>::zeros((8, 3));
for g0 in 0..8i64 {
for j0 in 0..3i64 {
x[[g0 as usize, j0 as usize]] = (120 + g0 * 37 + j0 * 19) as f64;
dp[[g0 as usize, j0 as usize]] =
(8 - g0) as f64 / 9.0 - j0 as f64 * 0.013 - g0 as f64 * 0.0007;
}
}
(x, dp)
}
#[test]
fn normexp_fit_detection_p_matches_r() {
let (x, dp) = detp_fixture();
let par = normexp_fit_detection_p(&x, &dp).unwrap();
let expect = [
[249.5, 4.3811404331177988, 2.3025850929940459],
[268.5, 4.3811404331177988, 2.3025850929940459],
[287.5, 4.3811404331177988, 2.3025850929940459],
];
for j in 0..3 {
for k in 0..3 {
assert!(rclose(par[[j, k]], expect[j][k]), "par[{j},{k}]");
}
}
}
#[test]
fn nec_detection_p_matches_r() {
let (x, dp) = detp_fixture();
let status = vec!["regular".to_string(); 8];
let out = nec(
&x,
Some(&status),
"negative",
"regular",
16.0,
false,
Some(&dp),
)
.unwrap();
let col = [
24.143687836126446,
24.538051994415582,
24.971626716787227,
25.450412234664782,
25.981663577672862,
26.574226144777185,
27.238981111286421,
27.989443237658861,
];
for g in 0..8 {
for j in 0..3 {
assert!(rclose(out[[g, j]], col[g]), "nec_dp[{g},{j}]");
}
}
}
}