use std::fs::File;
use std::io::{BufReader, BufWriter, Write};
use std::path::PathBuf;
use clap::Parser;
use rsomics_common::{CommonFlags, Result, RsomicsError, Tool, ToolMeta};
use rsomics_help::{Example, FlagSpec, HelpSpec, Origin, Section};
use rsomics_bioenv::{read_env, read_matrix, run_bioenv, write_result};
pub const META: ToolMeta = ToolMeta {
name: env!("CARGO_PKG_NAME"),
version: env!("CARGO_PKG_VERSION"),
};
#[derive(Parser, Debug)]
#[command(name = "rsomics-bioenv", version, about, long_about = None, disable_help_flag = true)]
pub struct Cli {
dm: PathBuf,
#[arg(short = 'e', long)]
env: PathBuf,
#[arg(short = 'c', long, value_delimiter = ',')]
columns: Option<Vec<String>>,
#[arg(short = 'o', long, default_value = "-")]
output: String,
#[command(flatten)]
pub common: CommonFlags,
}
impl Tool for Cli {
fn meta() -> ToolMeta {
META
}
fn common(&self) -> &CommonFlags {
&self.common
}
fn execute(self) -> Result<()> {
self.common.install_rayon_pool()?;
let dm_src = self.dm.display().to_string();
let env_src = self.env.display().to_string();
let dm = read_matrix(open(&self.dm)?, &dm_src)?;
let env = read_env(open(&self.env)?, &env_src)?;
let best = run_bioenv(&dm, &env, self.columns.as_deref(), &env_src)?;
let mut out: Box<dyn Write> = if self.output == "-" {
Box::new(BufWriter::new(std::io::stdout().lock()))
} else {
Box::new(BufWriter::new(
File::create(&self.output).map_err(RsomicsError::Io)?,
))
};
write_result(&mut out, &best)?;
out.flush().map_err(RsomicsError::Io)?;
if !self.common.quiet
&& let Some(top) = best.iter().max_by(|a, b| {
a.correlation
.partial_cmp(&b.correlation)
.unwrap_or(std::cmp::Ordering::Equal)
})
{
eprintln!(
"best overall: rho={:.6} at size {} ({})",
top.correlation,
top.size,
top.vars.join(", ")
);
}
Ok(())
}
}
fn open(path: &std::path::Path) -> Result<BufReader<File>> {
File::open(path)
.map(BufReader::new)
.map_err(|e| RsomicsError::InvalidInput(format!("{}: {e}", path.display())))
}
pub static HELP: HelpSpec = HelpSpec {
name: env!("CARGO_PKG_NAME"),
version: env!("CARGO_PKG_VERSION"),
tagline: "BIO-ENV / BEST: the env-variable subset best correlated with community distances.",
origin: Some(Origin {
upstream: "scikit-bio skbio.stats.distance.bioenv",
upstream_license: "BSD-3-Clause",
our_license: "MIT OR Apache-2.0",
paper_doi: Some("10.3354/meps092205"),
}),
usage_lines: &["<dm.tsv> --env <env.tsv> [-c ph,depth] [-o out.tsv]"],
sections: &[Section {
title: "OPTIONS",
flags: &[
FlagSpec {
short: Some('e'),
long: "env",
aliases: &[],
value: Some("<path>"),
type_hint: Some("Path"),
required: true,
default: None,
description: "Environmental variables TSV (samples × numeric vars).",
why_default: None,
},
FlagSpec {
short: Some('c'),
long: "columns",
aliases: &[],
value: Some("<a,b,…>"),
type_hint: Some("List"),
required: false,
default: None,
description: "Comma-separated subset of env columns to consider; all if unset.",
why_default: None,
},
FlagSpec {
short: Some('o'),
long: "output",
aliases: &[],
value: Some("<path>"),
type_hint: Some("String"),
required: false,
default: Some("-"),
description: "Output path (- for stdout).",
why_default: None,
},
],
}],
examples: &[
Example {
description: "All env variables against a community distance matrix",
command: "rsomics-bioenv dm.tsv --env env.tsv -o best.tsv",
},
Example {
description: "Restrict to three named variables",
command: "rsomics-bioenv dm.tsv -e env.tsv -c pH,depth,salinity",
},
],
json_result_schema_doc: None,
};
#[cfg(test)]
mod tests {
use super::*;
use clap::CommandFactory;
#[test]
fn cli_debug_assert() {
Cli::command().debug_assert();
}
}