#![cfg(feature = "cli")]
use std::fs;
use std::path::PathBuf;
use std::process::Command;
use limma::{ebayes, lmfit, top_table, SortBy};
use ndarray::Array2;
struct Fixture {
dir: PathBuf,
exprs: PathBuf,
design: PathBuf,
data: Array2<f64>,
design_mat: Array2<f64>,
genes: Vec<String>,
coefs: Vec<String>,
}
impl Drop for Fixture {
fn drop(&mut self) {
let _ = fs::remove_dir_all(&self.dir);
}
}
fn unique_dir(tag: &str) -> PathBuf {
let nanos = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_nanos();
let mut d = std::env::temp_dir();
d.push(format!("limma_it_{}_{}_{}", tag, std::process::id(), nanos));
fs::create_dir_all(&d).unwrap();
d
}
fn write_idmat(
path: &PathBuf,
id_header: &str,
ids: &[String],
cols: &[String],
m: &Array2<f64>,
sep: char,
) {
let mut s = String::from(id_header);
for c in cols {
s.push(sep);
s.push_str(c);
}
s.push('\n');
for (i, id) in ids.iter().enumerate() {
s.push_str(id);
for j in 0..cols.len() {
s.push(sep);
s.push_str(&format!("{}", m[[i, j]]));
}
s.push('\n');
}
fs::write(path, s).unwrap();
}
fn setup(tag: &str) -> Fixture {
setup_fmt(tag, "csv", ',')
}
fn setup_fmt(tag: &str, ext: &str, sep: char) -> Fixture {
let genes: Vec<String> = (1..=6).map(|i| format!("g{i}")).collect();
let samples: Vec<String> = (1..=6).map(|i| format!("s{i}")).collect();
let coefs: Vec<String> = vec!["Intercept".into(), "grpB".into()];
#[rustfmt::skip]
let data = Array2::from_shape_vec((6, 6), vec![
5.1, 4.9, 5.0, 7.0, 7.2, 6.9,
3.0, 3.1, 2.9, 3.0, 2.8, 3.1,
8.0, 8.2, 7.9, 6.0, 5.9, 6.1,
1.0, 1.2, 0.9, 1.1, 1.0, 1.3,
4.0, 4.1, 3.9, 4.2, 4.0, 4.1,
2.0, 2.1, 1.9, 5.0, 5.1, 4.9,
]).unwrap();
#[rustfmt::skip]
let design_mat = Array2::from_shape_vec((6, 2), vec![
1.0, 0.0,
1.0, 0.0,
1.0, 0.0,
1.0, 1.0,
1.0, 1.0,
1.0, 1.0,
]).unwrap();
let dir = unique_dir(tag);
let exprs = dir.join(format!("exprs.{ext}"));
let design = dir.join(format!("design.{ext}"));
write_idmat(&exprs, "gene", &genes, &samples, &data, sep);
write_idmat(&design, "sample", &samples, &coefs, &design_mat, sep);
Fixture {
dir,
exprs,
design,
data,
design_mat,
genes,
coefs,
}
}
fn run_cli(args: &[&str]) -> std::process::Output {
Command::new(env!("CARGO_BIN_EXE_limma"))
.args(args)
.output()
.expect("failed to spawn limma binary")
}
#[test]
fn cli_matches_library_end_to_end() {
let fx = setup("e2e");
let out = fx.dir.join("out.csv");
let dump = fx.dir.join("dump.csv");
let res = run_cli(&[
"--exprs",
fx.exprs.to_str().unwrap(),
"--design",
fx.design.to_str().unwrap(),
"--coef",
"grpB",
"--out",
out.to_str().unwrap(),
"--dump",
dump.to_str().unwrap(),
]);
assert!(
res.status.success(),
"CLI exited with failure: {}",
String::from_utf8_lossy(&res.stderr)
);
let out_txt = fs::read_to_string(&out).unwrap();
let mut lines = out_txt.lines();
assert_eq!(
lines.next().unwrap(),
"id,log2FoldChange,lfcSE,AveExpr,t,P.Value,adj.P.Val,B"
);
let cli_rows: Vec<(String, f64, f64, f64)> = lines
.map(|l| {
let f: Vec<&str> = l.split(',').collect();
(
f[0].to_string(),
f[1].parse().unwrap(), f[4].parse().unwrap(), f[7].parse().unwrap(), )
})
.collect();
assert_eq!(cli_rows.len(), 6, "expected one row per gene");
let mut fit = lmfit(&fx.data, &fx.design_mat, fx.genes.clone(), fx.coefs.clone()).unwrap();
ebayes(&mut fit, 0.01, (0.1, 4.0), false, false).unwrap();
let coef = fx.coefs.iter().position(|c| c == "grpB").unwrap();
let lib_rows = top_table(&fit, coef, SortBy::B, None).unwrap();
assert_eq!(lib_rows.len(), 6);
let close = |a: f64, b: f64| (a - b).abs() <= 1e-6 * (1.0 + b.abs());
for (cli, lib) in cli_rows.iter().zip(lib_rows.iter()) {
assert_eq!(
cli.0, lib.id,
"gene ordering must match the library's B sort"
);
assert!(
close(cli.1, lib.log2_fold_change),
"logFC mismatch for {}",
cli.0
);
assert!(close(cli.2, lib.t), "t mismatch for {}", cli.0);
assert!(close(cli.3, lib.b), "B mismatch for {}", cli.0);
}
assert_eq!(
cli_rows[0].0, "g6",
"g6 has the largest effect, sorts first by B"
);
assert!(
cli_rows.iter().any(|r| r.0 == "g3" && r.1 < 0.0),
"g3 is down in B"
);
let dump_txt = fs::read_to_string(&dump).unwrap();
let mut dl = dump_txt.lines();
assert_eq!(
dl.next().unwrap(),
"id,coef,coefficient,stdev_unscaled,t,p_value,lods,sigma,s2_post,df_total,F,F_p_value"
);
assert_eq!(dl.count(), 12, "6 genes x 2 coefficients");
}
#[test]
fn cli_rejects_unknown_coefficient() {
let fx = setup("badcoef");
let res = run_cli(&[
"--exprs",
fx.exprs.to_str().unwrap(),
"--design",
fx.design.to_str().unwrap(),
"--coef",
"does_not_exist",
]);
assert!(
!res.status.success(),
"CLI should fail on an unknown coefficient name"
);
}
#[test]
fn cli_reads_tsv_inputs() {
let fx = setup_fmt("tsv", "tsv", '\t');
let out = fx.dir.join("out.csv");
let res = run_cli(&[
"--exprs",
fx.exprs.to_str().unwrap(),
"--design",
fx.design.to_str().unwrap(),
"--coef",
"grpB",
"--out",
out.to_str().unwrap(),
]);
assert!(
res.status.success(),
"CLI should read .tsv inputs: {}",
String::from_utf8_lossy(&res.stderr)
);
let out_txt = fs::read_to_string(&out).unwrap();
let first_data = out_txt.lines().nth(1).unwrap();
let top_gene = first_data.split(',').next().unwrap();
assert_eq!(
top_gene, "g6",
"TSV must parse to the same matrix: g6 still sorts first by B"
);
}
#[test]
fn cli_delimiter_override() {
let fx = setup_fmt("semi", "csv", ';');
let out = fx.dir.join("out.csv");
let ok = run_cli(&[
"--exprs",
fx.exprs.to_str().unwrap(),
"--design",
fx.design.to_str().unwrap(),
"--coef",
"grpB",
"--delimiter",
";",
"--out",
out.to_str().unwrap(),
]);
assert!(
ok.status.success(),
"CLI should honour --delimiter ';': {}",
String::from_utf8_lossy(&ok.stderr)
);
let out_txt = fs::read_to_string(&out).unwrap();
let top_gene = out_txt.lines().nth(1).unwrap().split(',').next().unwrap();
assert_eq!(top_gene, "g6");
let bad = run_cli(&[
"--exprs",
fx.exprs.to_str().unwrap(),
"--design",
fx.design.to_str().unwrap(),
"--coef",
"grpB",
]);
assert!(
!bad.status.success(),
"a semicolon file read as comma should fail without --delimiter"
);
}