use clap::{builder::ArgPredicate, Parser};
use parse_size::parse_size;
use serde::Serialize;
use std::fmt;
use std::path::PathBuf;
use std::str::FromStr;
use tracing::info;
#[derive(Clone, Debug, clap::ValueEnum, Serialize)]
pub enum FilterGroup {
NoFilters,
NanocountFilters,
}
fn parse_strand(arg: &str) -> anyhow::Result<bio_types::strand::Strand> {
match arg {
"+" | "fw" | "FW" | "f" | "F" => Ok(bio_types::strand::Strand::Forward),
"-" | "rc" | "RC" | "r" | "R" => Ok(bio_types::strand::Strand::Reverse),
"." | "both" | "either" => Ok(bio_types::strand::Strand::Unknown),
_ => anyhow::bail!("Cannot parse {} as a valid strand type", arg),
}
}
#[derive(Debug, Clone, clap::ValueEnum, Serialize)]
pub enum ReadAssignmentProbOut {
Uncompressed,
Compressed,
}
fn parse_assign_prob_out_value(s: &str) -> anyhow::Result<ReadAssignmentProbOut> {
match s.to_lowercase().as_str() {
"raw" => Ok(ReadAssignmentProbOut::Uncompressed),
"uncompressed" => Ok(ReadAssignmentProbOut::Uncompressed),
"compressed" => Ok(ReadAssignmentProbOut::Compressed),
"lz4" => Ok(ReadAssignmentProbOut::Compressed),
x => anyhow::bail!(
"Cannot parse {} as a valid option for read assignment probability output",
x
),
}
}
#[derive(Debug, Clone, clap::ValueEnum, Serialize)]
pub enum SequencingTech {
OntCDNA,
OntDRNA,
PacBio,
PacBioHifi,
}
impl FromStr for SequencingTech {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"ont" => Ok(SequencingTech::OntCDNA),
"ont-cdna" => Ok(SequencingTech::OntCDNA),
"ont-drna" => Ok(SequencingTech::OntDRNA),
"pb" => Ok(SequencingTech::PacBio),
"pacbio" => Ok(SequencingTech::PacBio),
"pb-hifi" => Ok(SequencingTech::PacBioHifi),
"pacbio-hifi" => Ok(SequencingTech::PacBioHifi),
x => Err(format!("Unknown protocol type {:}", x)),
}
}
}
#[derive(Debug, Clone, PartialEq, Serialize)]
pub enum FilterArg {
DefaultI64(i64),
ProvidedI64(i64),
DefaultU32(u32),
ProvidedU32(u32),
DefaultF32(f32),
ProvidedF32(f32),
}
const DEFAULT_FILTER_PREFIX: &str = "*";
impl fmt::Display for FilterArg {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
FilterArg::DefaultI64(x) => write!(f, "{}{}", DEFAULT_FILTER_PREFIX, x),
FilterArg::DefaultU32(x) => write!(f, "{}{}", DEFAULT_FILTER_PREFIX, x),
FilterArg::DefaultF32(x) => write!(f, "{}{}", DEFAULT_FILTER_PREFIX, x),
FilterArg::ProvidedI64(x) => write!(f, "{}", x),
FilterArg::ProvidedU32(x) => write!(f, "{}", x),
FilterArg::ProvidedF32(x) => write!(f, "{}", x),
}
}
}
impl FilterArg {
pub fn try_as_i64(&self) -> anyhow::Result<i64> {
match self {
FilterArg::DefaultI64(x) => Ok(*x),
FilterArg::ProvidedI64(x) => Ok(*x),
_ => anyhow::bail!("Could not provide FilterArg variant as an i64"),
}
}
pub fn try_as_u32(&self) -> anyhow::Result<u32> {
match self {
FilterArg::DefaultU32(x) => Ok(*x),
FilterArg::ProvidedU32(x) => Ok(*x),
_ => anyhow::bail!("Could not provide FilterArg variant as a u32"),
}
}
pub fn try_as_f32(&self) -> anyhow::Result<f32> {
match self {
FilterArg::DefaultF32(x) => Ok(*x),
FilterArg::ProvidedF32(x) => Ok(*x),
_ => anyhow::bail!("Could not provide FilterArg variant as an f32"),
}
}
pub fn provided_or_u32(&self, msg: &str, other: u32) -> u32 {
match self {
FilterArg::ProvidedU32(x) => {
info!("{} {}", msg, x);
*x
}
_ => other,
}
}
pub fn provided_or_i64(&self, msg: &str, other: i64) -> i64 {
match self {
FilterArg::ProvidedI64(x) => {
info!("{} {}", msg, x);
*x
}
_ => other,
}
}
pub fn provided_or_f32(&self, msg: &str, other: f32) -> f32 {
match self {
FilterArg::ProvidedF32(x) => {
info!("{} {}", msg, x);
*x
}
_ => other,
}
}
}
fn parse_filter_i64(arg: &str) -> anyhow::Result<FilterArg> {
if let Some(val) = arg.strip_prefix(DEFAULT_FILTER_PREFIX) {
let v = val.parse::<i64>()?;
Ok(FilterArg::DefaultI64(v))
} else {
let v = arg.parse::<i64>()?;
Ok(FilterArg::ProvidedI64(v))
}
}
fn parse_filter_u32(arg: &str) -> anyhow::Result<FilterArg> {
if let Some(val) = arg.strip_prefix(DEFAULT_FILTER_PREFIX) {
let v = val.parse::<u32>()?;
Ok(FilterArg::DefaultU32(v))
} else {
let v = arg.parse::<u32>()?;
Ok(FilterArg::ProvidedU32(v))
}
}
fn parse_filter_f32(arg: &str) -> anyhow::Result<FilterArg> {
if let Some(val) = arg.strip_prefix(DEFAULT_FILTER_PREFIX) {
let v = val.parse::<f32>()?;
Ok(FilterArg::DefaultF32(v))
} else {
let v = arg.parse::<f32>()?;
Ok(FilterArg::ProvidedF32(v))
}
}
#[derive(Parser, Debug, Serialize)]
#[clap(author, version, about, long_about = None)]
#[command(group(
clap::ArgGroup::new("input")
.required(true)
.args(["alignments", "reads"])
))]
pub struct Args {
#[arg(long, conflicts_with = "verbose")]
pub quiet: bool,
#[arg(long)]
pub verbose: bool,
#[arg(short, long, help_heading = "alignment mode")]
pub alignments: Option<PathBuf>,
#[arg(
long,
help_heading = "raw read mode",
value_delimiter = ',',
requires_ifs([
(ArgPredicate::IsPresent, "reference"),
(ArgPredicate::IsPresent, "seq_tech")
])
)]
pub reads: Option<Vec<PathBuf>>,
#[arg(long, conflicts_with = "alignments", help_heading = "raw read mode")]
pub reference: Option<PathBuf>,
#[arg(long, conflicts_with = "alignments", help_heading = "raw read mode")]
pub index_out: Option<PathBuf>,
#[arg(
long,
help_heading = "raw read mode",
required_unless_present = "alignments",
value_parser = clap::value_parser!(SequencingTech)
)]
pub seq_tech: Option<SequencingTech>,
#[arg(
long,
default_value_t = 100,
requires = "reads",
help_heading = "raw read mode"
)]
pub best_n: usize,
#[arg(
long,
conflicts_with = "alignments",
help_heading = "raw read mode",
default_value = "1GB",
value_parser = |s: &str| parse_size(s)
)]
pub thread_buff_size: u64,
#[arg(short, long, required = true)]
pub output: PathBuf,
#[arg(long, help_heading = "filters", value_enum)]
pub filter_group: Option<FilterGroup>,
#[arg(short, long, help_heading="filters", default_value_t = FilterArg::DefaultI64(u32::MAX as i64), value_parser = parse_filter_i64)]
pub three_prime_clip: FilterArg,
#[arg(short, long, help_heading="filters", default_value_t = FilterArg::DefaultU32(u32::MAX), value_parser = parse_filter_u32)]
pub five_prime_clip: FilterArg,
#[arg(short, long, help_heading = "filters", default_value_t = FilterArg::DefaultF32(0.95), value_parser = parse_filter_f32)]
pub score_threshold: FilterArg,
#[arg(short, long, help_heading = "filters", default_value_t = FilterArg::DefaultF32(0.5), value_parser = parse_filter_f32)]
pub min_aligned_fraction: FilterArg,
#[arg(short = 'l', long, help_heading = "filters", default_value_t = FilterArg::DefaultU32(50), value_parser = parse_filter_u32)]
pub min_aligned_len: FilterArg,
#[arg(
short = 'd',
long,
help_heading = "filters",
default_value_t = bio_types::strand::Strand::Unknown,
value_parser = parse_strand
)]
pub strand_filter: bio_types::strand::Strand,
#[arg(long, conflicts_with = "reads")]
pub single_cell: bool,
#[arg(long, help_heading = "coverage model", value_parser)]
pub model_coverage: bool,
#[arg(
short = 'k',
long,
help_heading = "coverage model",
value_parser,
default_value_t = 2.0
)]
pub growth_rate: f64,
#[arg(
long,
help_heading = "output read-txps probabilities",
conflicts_with = "single-cell",
default_missing_value = "uncompressed",
num_args = 0..=1,
require_equals = true,
value_parser = parse_assign_prob_out_value
)]
pub write_assignment_probs: Option<ReadAssignmentProbOut>,
#[arg(long, help_heading = "EM", default_value_t = 1000)]
pub max_em_iter: u32,
#[arg(long, help_heading = "EM", default_value_t = 1e-3)]
pub convergence_thresh: f64,
#[arg(short = 'j', long, default_value_t = 3)]
pub threads: usize,
#[arg(short = 'q', long, help_heading = "EM")]
pub short_quant: Option<String>,
#[arg(long, default_value_t = 0)]
pub num_bootstraps: u32,
#[arg(short, long, help_heading = "coverage model", default_value_t = 100)]
pub bin_width: u32,
#[arg(long, hide = true, default_value_t = 100_000)]
pub sort_check_num: usize,
#[arg(short, long, hide = true)]
pub use_kde: bool,
}