use core::fmt;
use clap::{ArgGroup, Args, Parser, Subcommand, ValueEnum};
use crate::DEFAULT_KMER;
use super::hashing::{AaLevel, HashType, DEFAULT_LEVEL};
pub const DEFAULT_STRAND: bool = false;
pub const DEFAULT_MINCOUNT: u16 = 5;
pub const DEFAULT_MINQUAL: u8 = 20;
pub const DEFAULT_SKETCHSIZE: u64 = 1000;
pub const DEFAULT_KNN: usize = 50;
#[derive(Clone, Debug, PartialEq, PartialOrd, ValueEnum, Default)]
pub enum InvertedQueryType {
#[default]
MatchCount,
AllBins,
AnyBins,
}
impl fmt::Display for InvertedQueryType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
InvertedQueryType::MatchCount => write!(f, "Count of matching bins")?,
InvertedQueryType::AllBins => write!(f, "All bins matching")?,
InvertedQueryType::AnyBins => write!(f, "At least one bin matching")?,
}
Ok(())
}
}
#[doc(hidden)]
fn valid_cpus(s: &str) -> Result<usize, String> {
let threads: usize = s
.parse()
.map_err(|_| format!("`{s}` isn't a valid number of cores"))?;
if threads < 1 {
Err("Threads must be one or higher".to_string())
} else {
Ok(threads)
}
}
pub fn check_and_set_threads(threads: usize) {
let max_threads = num_cpus::get();
if threads > max_threads {
log::warn!("{threads} threads is greater than available cores {max_threads}");
} else {
log::info!("Using {threads} threads");
}
rayon::ThreadPoolBuilder::new()
.num_threads(threads)
.build_global()
.unwrap();
}
#[derive(Parser)]
#[command(author, version, about, long_about = None)]
#[command(propagate_version = true)]
pub struct MainArgs {
#[doc(hidden)]
#[command(subcommand)]
pub command: Commands,
#[arg(short, long, global = true)]
pub verbose: bool,
#[arg(long, global = true)]
pub quiet: bool,
}
#[derive(Args)]
#[group(required = true, multiple = false)]
pub struct Kmers {
#[arg(short, long, required = true, value_delimiter = ',')]
pub k_vals: Option<Vec<usize>>,
#[arg(long, required = true, value_delimiter = ',')]
pub k_seq: Option<Vec<usize>>,
}
#[derive(Subcommand)]
pub enum Commands {
#[command(group(
ArgGroup::new("input")
.required(true)
.args(["seq_files", "file_list"]),
))]
Sketch {
#[arg(group = "input")]
seq_files: Option<Vec<String>>,
#[arg(short, group = "input")]
file_list: Option<String>,
#[arg(long, default_value_t = false)]
concat_fasta: bool,
#[cfg(feature = "3di")]
#[arg(long, default_value_t = false)]
convert_pdb: bool,
#[arg(short)]
output: String,
#[command(flatten)]
kmers: Kmers,
#[arg(short, long, default_value_t = DEFAULT_SKETCHSIZE)]
sketch_size: u64,
#[arg(long, value_enum, default_value_t = HashType::DNA)]
seq_type: HashType,
#[arg(long, value_enum, default_value_t = DEFAULT_LEVEL)]
level: AaLevel,
#[arg(long, default_value_t = DEFAULT_STRAND)]
single_strand: bool,
#[arg(long, default_value_t = DEFAULT_MINCOUNT)]
min_count: u16,
#[arg(long, default_value_t = DEFAULT_MINQUAL)]
min_qual: u8,
#[arg(long, value_parser = valid_cpus, default_value_t = 1)]
threads: usize,
},
Dist {
#[arg(required = true)]
ref_db: String,
#[arg(group = "query")]
query_db: Option<String>,
#[arg(short)]
output: Option<String>,
#[arg(long, group = "query")]
knn: Option<usize>,
#[arg(long)]
subset: Option<String>,
#[arg(short)]
kmer: Option<usize>,
#[arg(long, requires("kmer"), default_value_t = false)]
ani: bool,
#[arg(long, value_parser = valid_cpus, default_value_t = 1)]
threads: usize,
#[arg(long)]
completeness_file: Option<String>,
#[arg(long, default_value_t = 0.64)]
completeness_cutoff: f64,
},
Inverted {
#[command(subcommand)]
command: InvertedCommands,
},
Merge {
#[arg(required = true)]
db1: String,
#[arg(required = true)]
db2: String,
#[arg(required = true, short)]
output: String,
},
Append {
#[arg(required = true)]
db: String,
#[arg(group = "input")]
seq_files: Option<Vec<String>>,
#[arg(short, group = "input")]
file_list: Option<String>,
#[arg(required = true, short)]
output: String,
#[arg(long, default_value_t = DEFAULT_STRAND)]
single_strand: bool,
#[arg(long, default_value_t = DEFAULT_MINCOUNT)]
min_count: u16,
#[arg(long, default_value_t = DEFAULT_MINQUAL)]
min_qual: u8,
#[arg(long, default_value_t = false)]
concat_fasta: bool,
#[arg(long, value_parser = valid_cpus, default_value_t = 1)]
threads: usize,
#[arg(long, value_enum, default_value_t = DEFAULT_LEVEL)]
level: AaLevel,
},
Delete {
#[arg(required = true)]
db: String,
#[arg(required = true)]
samples: String,
#[arg(required = true)]
output_file: String,
},
Info {
skm_file: String,
#[arg(long, default_value_t = false)]
sample_info: bool,
},
}
#[derive(Subcommand)]
pub enum InvertedCommands {
Build {
#[arg(group = "input")]
seq_files: Option<Vec<String>>,
#[arg(short, group = "input")]
file_list: Option<String>,
#[arg(required = true, short)]
output: String,
#[arg(long)]
write_skq: bool,
#[arg(long)]
species_names: Option<String>,
#[arg(short, long, default_value_t = DEFAULT_SKETCHSIZE)]
sketch_size: u64,
#[arg(short, long, default_value_t = DEFAULT_KMER)]
kmer_length: usize,
#[arg(long, default_value_t = DEFAULT_STRAND)]
single_strand: bool,
#[arg(long, default_value_t = DEFAULT_MINCOUNT)]
min_count: u16,
#[arg(long, default_value_t = DEFAULT_MINQUAL)]
min_qual: u8,
#[arg(long, value_parser = valid_cpus, default_value_t = 1)]
threads: usize,
},
Query {
#[arg(required = true)]
ski: String,
#[arg(group = "input")]
seq_files: Option<Vec<String>>,
#[arg(short, group = "input")]
file_list: Option<String>,
#[arg(short)]
output: Option<String>,
#[arg(long, value_enum, default_value_t = InvertedQueryType::MatchCount)]
query_type: InvertedQueryType,
#[arg(long, default_value_t = DEFAULT_MINCOUNT)]
min_count: u16,
#[arg(long, default_value_t = DEFAULT_MINQUAL)]
min_qual: u8,
#[arg(long, value_parser = valid_cpus, default_value_t = 1)]
threads: usize,
},
#[command(group(
ArgGroup::new("mode")
.required(true)
.args(["skd", "count"]),
))]
Precluster {
#[arg(required = true)]
ski: String,
#[arg(long, group = "mode")]
skd: Option<String>,
#[arg(short)]
output: Option<String>,
#[arg(long, group = "mode")]
count: bool,
#[arg(long, default_value_t = DEFAULT_KNN)]
knn: usize,
#[arg(long, default_value_t = false)]
ani: bool,
#[arg(long, value_parser = valid_cpus, default_value_t = 1)]
threads: usize,
#[arg(long)]
completeness_file: Option<String>,
#[arg(long, default_value_t = 0.64)]
completeness_cutoff: f64,
},
}
pub fn cli_args() -> MainArgs {
MainArgs::parse()
}