use std::fmt;
use super::QualFilter;
use clap::{ArgGroup, Parser, Subcommand, ValueEnum};
use std::path::PathBuf;
pub const DEFAULT_KMER: usize = 31;
pub const DEFAULT_PROPORTION_READS: Option<f64> = None;
pub const DEFAULT_STRAND: bool = false;
pub const DEFAULT_MINFREQ: f64 = 0.9;
pub const DEFAULT_AMBIGMISSING: bool = false;
pub const DEFAULT_REPEATMASK: bool = false;
pub const DEFAULT_AMBIGMASK: bool = false;
pub const DEFAULT_CONSTGAPS: bool = false;
pub const DEFAULT_MINCOUNT: u16 = 5;
pub const DEFAULT_MINQUAL: u8 = 20;
pub const DEFAULT_QUALFILTER: QualFilter = QualFilter::Strict;
pub const DEFAULT_MISSING_SKALO: f32 = 0.1;
pub const DEFAULT_MAX_PATHDEPTH: usize = 4;
pub const DEFAULT_MAX_INDEL_KMERS: usize = 2;
#[doc(hidden)]
fn valid_kmer(s: &str) -> Result<usize, String> {
let k: usize = s
.parse()
.map_err(|_| format!("`{s}` isn't a valid k-mer"))?;
if !(5..=63).contains(&k) || k.is_multiple_of(2) {
Err("K-mer must be an odd number between 5 and 63 (inclusive)".to_string())
} else {
Ok(k)
}
}
#[doc(hidden)]
fn valid_proportion(s: &str) -> Result<f64, String> {
let p: f64 = s
.parse()
.map_err(|_| format!("`{s}` isn't a valid proportion"))?;
if !(0.0..=1.0).contains(&p) {
Err("K-mer must be between 0 and 1 (inclusive)".to_string())
} else {
Ok(p)
}
}
#[doc(hidden)]
fn zero_to_one(s: &str) -> Result<f64, String> {
let f: f64 = s
.parse()
.map_err(|_| format!("`{s}` isn't a valid frequency"))?;
if !(0.0..=1.0).contains(&f) {
Err("Frequency must be between 0 and 1 (inclusive)".to_string())
} else {
Ok(f)
}
}
#[doc(hidden)]
fn valid_cpus(s: &str) -> Result<usize, String> {
let threads: usize = s
.parse()
.map_err(|_| format!("`{s}` isn't a valid number of cores"))?;
if threads < 1 {
Err("Threads must be one or higher".to_string())
} else {
Ok(threads)
}
}
pub fn check_threads(threads: usize) {
let max_threads = num_cpus::get();
if threads > max_threads {
log::warn!("{threads} threads is greater than available cores {max_threads}");
}
}
#[doc(hidden)]
pub fn valid_min_kmer(s: &str) -> Result<ValidMinKmer, String> {
match s {
s if s.eq(&String::from("auto")) => Ok(ValidMinKmer::Auto),
s => {
let x: u16 = s.parse().expect("Invalid minimum kmer count");
if x.ge(&1) {
log::info!("Using provided minimum kmer count of {x}");
Ok(ValidMinKmer::Val(x))
} else {
Err("Minimum kmer count must be >= 1".to_string())
}
}
}
}
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub enum ValidMinKmer {
Auto,
Val(u16),
}
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)]
pub enum FileType {
Vcf,
Aln,
}
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)]
pub enum FilterType {
NoFilter,
NoConst,
NoAmbig,
NoAmbigOrConst,
}
impl fmt::Display for FilterType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Self::NoFilter => write!(f, "No filtering"),
Self::NoConst => write!(f, "No constant sites"),
Self::NoAmbig => write!(f, "No ambiguous sites"),
Self::NoAmbigOrConst => write!(f, "No constant sites or ambiguous bases"),
}
}
}
#[derive(Parser)]
#[command(author, version, about, long_about = None)]
#[command(propagate_version = true)]
pub struct Args {
#[doc(hidden)]
#[command(subcommand)]
pub command: Commands,
#[arg(short, long, global = true)]
pub verbose: bool,
}
#[derive(Subcommand)]
pub enum Commands {
#[command(group(
ArgGroup::new("input")
.required(true)
.args(["seq_files", "file_list"]),
))]
Build {
#[arg(group = "input")]
seq_files: Option<Vec<String>>,
#[arg(short, group = "input")]
file_list: Option<String>,
#[arg(short)]
output: String,
#[arg(short, value_parser = valid_kmer, default_value_t = DEFAULT_KMER)]
k: usize,
#[arg(long, value_parser = valid_proportion)]
proportion_reads: Option<f64>,
#[arg(long, default_value_t = DEFAULT_STRAND)]
single_strand: bool,
#[arg(long, value_parser = valid_min_kmer)]
min_count: Option<ValidMinKmer>,
#[arg(long, default_value_t = DEFAULT_MINQUAL)]
min_qual: u8,
#[arg(long, value_enum, default_value_t = DEFAULT_QUALFILTER)]
qual_filter: QualFilter,
#[arg(long, value_parser = valid_cpus, default_value_t = 1)]
threads: usize,
},
Align {
#[arg(required = true)]
input: Vec<String>,
#[arg(short)]
output: Option<String>,
#[arg(short, long, value_parser = zero_to_one, default_value_t = DEFAULT_MINFREQ)]
min_freq: f64,
#[arg(long, default_value_t = DEFAULT_AMBIGMISSING)]
filter_ambig_as_missing: bool,
#[arg(long, value_enum, default_value_t = FilterType::NoConst)]
filter: FilterType,
#[arg(long, default_value_t = DEFAULT_AMBIGMASK)]
ambig_mask: bool,
#[arg(long, default_value_t = DEFAULT_CONSTGAPS)]
no_gap_only_sites: bool,
#[arg(long, value_parser = valid_cpus, default_value_t = 1)]
threads: usize,
},
Map {
reference: String,
input: Vec<String>,
#[arg(short)]
output: Option<String>,
#[arg(short, long, value_enum, default_value_t = FileType::Aln)]
format: FileType,
#[arg(long, default_value_t = DEFAULT_AMBIGMASK)]
ambig_mask: bool,
#[arg(long, default_value_t = DEFAULT_REPEATMASK)]
repeat_mask: bool,
#[arg(long, value_parser = valid_cpus, default_value_t = 1)]
threads: usize,
},
Distance {
skf_file: String,
#[arg(short)]
output: Option<String>,
#[arg(short, long, value_parser = zero_to_one, default_value_t = 0.0)]
min_freq: f64,
#[arg(long, default_value_t = false)]
allow_ambiguous: bool,
#[arg(long, value_parser = valid_cpus, default_value_t = 1)]
threads: usize,
},
Merge {
skf_files: Vec<String>,
#[arg(short)]
output: String,
},
#[command(group(
ArgGroup::new("input")
.required(true)
.args(["names", "file_list"]),
))]
Delete {
#[arg(short, long, required = true)]
skf_file: String,
#[arg(short)]
output: Option<String>,
#[arg(short, group = "input")]
file_list: Option<String>,
#[arg(group = "input")]
names: Option<Vec<String>>,
},
Weed {
skf_file: String,
weed_file: Option<String>,
#[arg(short)]
output: Option<String>,
#[arg(long, default_value_t = false)]
reverse: bool,
#[arg(short, long, value_parser = zero_to_one, default_value_t = DEFAULT_MINFREQ)]
min_freq: f64,
#[arg(long, default_value_t = DEFAULT_AMBIGMISSING)]
filter_ambig_as_missing: bool,
#[arg(long, value_enum, default_value_t = FilterType::NoFilter)]
filter: FilterType,
#[arg(long, default_value_t = DEFAULT_AMBIGMASK)]
ambig_mask: bool,
#[arg(long, default_value_t = DEFAULT_CONSTGAPS)]
no_gap_only_sites: bool,
},
Nk {
skf_file: String,
#[arg(long, default_value_t = false)]
full_info: bool,
},
Cov {
fastq_fwd: String,
fastq_rev: String,
#[arg(short, value_parser = valid_kmer, default_value_t = DEFAULT_KMER)]
k: usize,
#[arg(long, default_value_t = DEFAULT_STRAND)]
single_strand: bool,
},
Lo {
input_skf: String,
output: String,
#[arg(short = 'r', long, help_heading = "input")]
reference: Option<PathBuf>,
#[arg(short = 'm', long, default_value_t = DEFAULT_MISSING_SKALO, help_heading = "output")]
missing: f32,
#[arg(
short = 'd',
long,
default_value_t = DEFAULT_MAX_PATHDEPTH,
help_heading = "graph traversal"
)]
depth: usize,
#[arg(short = 'n', long, default_value_t = DEFAULT_MAX_INDEL_KMERS, help_heading = "other")]
indel_kmers: usize,
#[arg(long, value_parser = valid_cpus, default_value_t = 1, help_heading = "other")]
threads: usize,
},
}
pub fn cli_args() -> Args {
Args::parse()
}