use std::path::PathBuf;
use clap::Parser;
use rsomics_common::{CommonFlags, Result, RsomicsError, Tool, ToolMeta};
use rsomics_tin::{TinOutput, TinSummary, compute_tin, summarise};
pub const META: ToolMeta = ToolMeta {
name: env!("CARGO_PKG_NAME"),
version: env!("CARGO_PKG_VERSION"),
};
#[derive(Parser, Debug)]
#[command(
name = "rsomics-tin",
version,
about = "Compute Transcript Integrity Number (TIN) for each transcript in a BED12 gene model",
long_about = "Compute TIN for each transcript (or gene) in BED12 format from a sorted, \
indexed BAM file.\n\n\
TIN is an entropy-based score (0–100) measuring coverage uniformity across the mRNA:\n \
TIN = 100 × exp(H) / n where H = Shannon entropy over n sampled mRNA positions\n\n\
Outputs:\n <bam_basename>.tin.xls per-transcript: geneID, chrom, tx_start, tx_end, TIN\n \
<bam_basename>.summary.txt sample-level mean/median/stdev TIN"
)]
pub struct Cli {
#[arg(short = 'i', long = "input")]
pub input_files: String,
#[arg(short = 'r', long = "refgene")]
pub refgene: PathBuf,
#[arg(short = 'c', long = "minCov", default_value = "10")]
pub min_cov: u64,
#[arg(short = 'n', long = "sample-size", default_value = "100")]
pub sample_size: usize,
#[command(flatten)]
pub common: CommonFlags,
}
impl Tool for Cli {
fn meta() -> ToolMeta {
META
}
fn common(&self) -> &CommonFlags {
&self.common
}
fn execute(self) -> Result<()> {
let bam_paths = resolve_bam_inputs(&self.input_files)?;
for bam_path in &bam_paths {
eprintln!("Processing {} ...", bam_path.display());
let result = compute_tin(bam_path, &self.refgene, self.min_cov, self.sample_size)?;
write_output(bam_path, &result)?;
if self.common.json {
let summary = build_summary(&result);
println!(
"{}",
serde_json::to_string_pretty(&summary)
.map_err(|e| RsomicsError::InvalidInput(e.to_string()))?
);
}
}
Ok(())
}
}
fn resolve_bam_inputs(input: &str) -> Result<Vec<PathBuf>> {
use std::path::Path;
let p = Path::new(input);
if p.is_dir() {
let mut bams: Vec<PathBuf> = std::fs::read_dir(p)
.map_err(RsomicsError::Io)?
.filter_map(std::result::Result::ok)
.map(|e| e.path())
.filter(|p| p.extension().and_then(|e| e.to_str()) == Some("bam"))
.collect();
bams.sort();
return Ok(bams);
}
if p.is_file() && !input.contains(',') {
let ext = p
.extension()
.and_then(|e| e.to_str())
.unwrap_or("")
.to_ascii_lowercase();
if ext == "bam" {
return Ok(vec![p.to_path_buf()]);
}
let content =
std::fs::read_to_string(p).map_err(|e| RsomicsError::Io(std::io::Error::other(e)))?;
let first_line = content.lines().next().unwrap_or("");
let trimmed = first_line.trim_end();
let looks_like_bam_list =
trimmed.len() >= 4 && trimmed[trimmed.len() - 4..].eq_ignore_ascii_case(".bam");
if looks_like_bam_list {
let paths: Vec<PathBuf> = content
.lines()
.map(str::trim)
.filter(|l| !l.is_empty())
.map(PathBuf::from)
.collect();
return Ok(paths);
}
return Ok(vec![p.to_path_buf()]);
}
Ok(input.split(',').map(|s| PathBuf::from(s.trim())).collect())
}
fn py_float(x: f64) -> String {
let s = format!("{x}");
if s.contains(['.', 'e', 'E']) || !x.is_finite() {
s
} else {
format!("{s}.0")
}
}
fn write_output(bam_path: &std::path::Path, result: &[TinOutput]) -> Result<()> {
use std::io::Write;
let base = bam_path
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("out");
let xls_path = format!("{base}.tin.xls");
let mut xls = std::fs::File::create(&xls_path).map_err(|e| {
RsomicsError::Io(std::io::Error::other(format!("creating {xls_path}: {e}")))
})?;
writeln!(xls, "geneID\tchrom\ttx_start\ttx_end\tTIN").map_err(RsomicsError::Io)?;
for row in result {
writeln!(
xls,
"{}\t{}\t{}\t{}\t{}",
row.gene_id,
row.chrom,
row.tx_start,
row.tx_end,
py_float(row.tin)
)
.map_err(RsomicsError::Io)?;
}
let summary = build_summary(result);
let sum_path = format!("{base}.summary.txt");
let mut sf = std::fs::File::create(&sum_path).map_err(|e| {
RsomicsError::Io(std::io::Error::other(format!("creating {sum_path}: {e}")))
})?;
writeln!(sf, "Bam_file\tTIN(mean)\tTIN(median)\tTIN(stdev)").map_err(RsomicsError::Io)?;
writeln!(
sf,
"{}\t{}\t{}\t{}",
bam_path.file_name().and_then(|n| n.to_str()).unwrap_or("?"),
py_float(summary.mean),
py_float(summary.median),
py_float(summary.stdev),
)
.map_err(RsomicsError::Io)?;
Ok(())
}
fn build_summary(result: &[TinOutput]) -> TinSummary {
summarise(result)
}
#[cfg(test)]
mod tests {
use super::*;
use clap::CommandFactory;
#[test]
fn cli_debug_assert() {
Cli::command().debug_assert();
}
}