use std::path::PathBuf;
use std::time::Instant;
use anyhow::{bail, Context, Result};
use clap::Parser;
use limma::io::{
align_contrasts, align_design, read_matrix, read_matrix_with_delimiter, write_fit,
write_fit_dump, write_test_results, write_top_table, write_top_table_f, WriteFitOptions,
};
use limma::{
contrasts_fit, decide_tests, ebayes, lmfit, top_table, top_table_f, top_treat, treat, Adjust,
DecideMethod, SortBy,
};
#[derive(Parser, Debug)]
#[command(
name = "limma",
about = "Pure-Rust port of Bioconductor limma (lmFit -> eBayes -> topTable)",
version
)]
struct Args {
#[arg(long)]
exprs: PathBuf,
#[arg(long)]
design: PathBuf,
#[arg(long)]
contrasts: Option<PathBuf>,
#[arg(long)]
delimiter: Option<String>,
#[arg(long)]
coef: Option<String>,
#[arg(long, default_value_t = false)]
f_test: bool,
#[arg(long, default_value_t = 0.01)]
proportion: f64,
#[arg(long, default_value_t = false)]
robust: bool,
#[arg(long, default_value_t = false)]
trend: bool,
#[arg(long)]
treat: Option<f64>,
#[arg(long, default_value = "B")]
sort: String,
#[arg(long)]
number: Option<usize>,
#[arg(long)]
out: Option<PathBuf>,
#[arg(long)]
dump: Option<PathBuf>,
#[arg(long)]
write_fit: Option<PathBuf>,
#[arg(long, default_value = "none")]
write_fit_adjust: String,
#[arg(long, default_value = "separate")]
write_fit_method: String,
#[arg(long)]
decide: Option<PathBuf>,
#[arg(long, default_value = "separate")]
decide_method: String,
#[arg(long, default_value = "BH")]
decide_adjust: String,
#[arg(long, default_value_t = 0.05)]
decide_p: f64,
#[arg(long, default_value_t = 0.0)]
lfc: f64,
#[arg(long, default_value_t = false)]
timing: bool,
#[arg(long)]
timing_json: Option<PathBuf>,
}
fn parse_delimiter(s: &str) -> Result<u8> {
let b = match s {
"," | "comma" => b',',
"\t" | "\\t" | "tab" => b'\t',
";" | "semicolon" => b';',
" " | "space" => b' ',
other => {
let mut chars = other.chars();
match (chars.next(), chars.next()) {
(Some(c), None) if c.is_ascii() => c as u8,
_ => bail!(
"invalid --delimiter {other:?}: expected a single ASCII character or one of \
comma, tab, semicolon, space"
),
}
}
};
Ok(b)
}
fn main() -> Result<()> {
let args = Args::parse();
let delimiter = args.delimiter.as_deref().map(parse_delimiter).transpose()?;
let read = |path: &std::path::Path| match delimiter {
Some(d) => read_matrix_with_delimiter(path, d),
None => read_matrix(path),
};
let expr = read(&args.exprs).context("reading expression matrix")?;
let design_raw = read(&args.design).context("reading design matrix")?;
let design = align_design(&design_raw, &expr.col_names)?;
let coef_names = design_raw.col_names.clone();
let t0 = Instant::now();
let mut fit = lmfit(
&expr.data,
&design,
expr.row_names.clone(),
coef_names.clone(),
)?;
let t_fit = t0.elapsed();
if let Some(cpath) = &args.contrasts {
let craw = read(cpath).context("reading contrast matrix")?;
let cmat = align_contrasts(&craw, &coef_names)?;
fit = contrasts_fit(&fit, &cmat, craw.col_names.clone())?;
}
let treat_active = args.treat.is_some();
let t1 = Instant::now();
if let Some(lfc) = args.treat {
treat(&mut fit, lfc)?;
} else {
ebayes(
&mut fit,
args.proportion,
(0.1, 4.0),
args.trend,
args.robust,
)?;
}
let t_eb = t1.elapsed();
if args.timing {
eprintln!(
"lmFit: {:.3} ms {}: {:.3} ms genes={} coef={}",
t_fit.as_secs_f64() * 1e3,
if treat_active { "treat" } else { "eBayes" },
t_eb.as_secs_f64() * 1e3,
fit.n_genes(),
fit.n_coef()
);
}
if let Some(tj) = &args.timing_json {
let json = format!(
"{{\n \"impl\": \"limma-rust\",\n \"n_genes\": {},\n \"n_samples\": {},\n \"n_coef\": {},\n \"lmfit_s\": {},\n \"ebayes_s\": {},\n \"total_s\": {}\n}}\n",
fit.n_genes(),
expr.data.ncols(),
fit.n_coef(),
t_fit.as_secs_f64(),
t_eb.as_secs_f64(),
t_fit.as_secs_f64() + t_eb.as_secs_f64(),
);
std::fs::write(tj, json).context("writing timing json")?;
}
if let Some(dpath) = &args.dump {
write_fit_dump(dpath, &fit)?;
}
if let Some(wpath) = &args.write_fit {
let opts = WriteFitOptions {
adjust: Adjust::parse(&args.write_fit_adjust)?,
method: DecideMethod::parse(&args.write_fit_method)?,
..Default::default()
};
write_fit(wpath, &fit, None, &opts)?;
}
if let Some(dpath) = &args.decide {
let method = DecideMethod::parse(&args.decide_method)?;
let adjust = Adjust::parse(&args.decide_adjust)?;
let res = decide_tests(&fit, method, adjust, args.decide_p, args.lfc)?;
let counts = res.counts();
eprintln!(
"decideTests ({}, {}, p<{}):",
args.decide_method, args.decide_adjust, args.decide_p
);
for (j, (down, notsig, up)) in counts.iter().enumerate() {
eprintln!(
" {:<16} Down {:>6} NotSig {:>6} Up {:>6}",
res.coef_names[j], down, notsig, up
);
}
write_test_results(dpath, &res)?;
}
let want_f = !treat_active && (args.f_test || (args.coef.is_none() && fit.n_coef() > 1));
if want_f {
let rows = top_table_f(&fit, args.number)?;
match &args.out {
Some(p) => write_top_table_f(p, &rows)?,
None => print_f(&rows),
}
} else {
let coef = resolve_coef(args.coef.as_deref(), &fit.coef_names)?;
let sort_by = SortBy::parse(&args.sort)?;
let rows = if treat_active {
let sb = if sort_by == SortBy::B {
SortBy::PValue
} else {
sort_by
};
top_treat(&fit, coef, sb, args.number)?
} else {
top_table(&fit, coef, sort_by, args.number)?
};
match &args.out {
Some(p) => write_top_table(p, &rows)?,
None => print_t(&rows),
}
}
Ok(())
}
fn resolve_coef(coef: Option<&str>, names: &[String]) -> Result<usize> {
match coef {
None => Ok(names.len() - 1),
Some(s) => {
if let Ok(idx) = s.parse::<usize>() {
if idx < names.len() {
return Ok(idx);
}
bail!("coef index {} out of range (have {})", idx, names.len());
}
names
.iter()
.position(|n| n == s)
.with_context(|| format!("coefficient '{}' not found in {:?}", s, names))
}
}
}
fn print_t(rows: &[limma::TopRow]) {
println!("id,log2FoldChange,lfcSE,AveExpr,t,P.Value,adj.P.Val,B");
for r in rows.iter().take(20) {
println!(
"{},{:.6},{:.6},{:.6},{:.6},{:.3e},{:.3e},{:.4}",
r.id, r.log2_fold_change, r.lfc_se, r.ave_expr, r.t, r.p_value, r.adj_p_value, r.b
);
}
}
fn print_f(rows: &[limma::TopRowF]) {
println!("id,AveExpr,F,P.Value,adj.P.Val");
for r in rows.iter().take(20) {
println!(
"{},{:.6},{:.6},{:.3e},{:.3e}",
r.id, r.ave_expr, r.f, r.p_value, r.adj_p_value
);
}
}