use flate2::write::GzEncoder;
use flate2::Compression;
use std::io::stdout;
use std::io::Write;
use std::path::PathBuf;
use std::{
error::Error,
fs::File,
io::{stdin, Read},
};
use clap::Parser;
use noodles::bgzf;
use crate::consts::VEP_FIELD_NAME;
use crate::{
consts::{
DEFAULT_FORMAT_DELIM, DEFAULT_INFO_PREFIX, DEFAULT_MISSING_STRING, DEFAULT_VEP_PREFIX,
},
utils::is_path_gz,
};
#[allow(clippy::struct_excessive_bools)]
#[derive(Debug, Clone, Parser)]
#[command(author, version, about, long_about = None)]
pub struct Opt {
#[arg(short, long, value_name = "FILE")]
pub input: Option<PathBuf>,
#[arg(short, long, value_name = "FILE")]
pub output: Option<PathBuf>,
#[arg(short, long, value_parser, value_name = "COLUMNS", num_args = 1..)]
pub not: Option<Vec<String>>,
#[arg(short, long, value_parser, value_name = "COLUMNS", num_args = 1..)]
pub just: Option<Vec<String>>,
#[arg(short, long)]
pub stack: bool,
#[arg(short, long)]
pub vep_fields: bool,
#[arg(short, long)]
pub csv: bool,
#[arg(short = 'g', long)]
pub in_gz: bool,
#[arg(short = 'z', long)]
pub out_gz: bool,
#[arg(short, long)]
pub lenient: bool,
#[arg(long, value_name = "PREFIX", default_value_t = DEFAULT_INFO_PREFIX.to_string())]
pub info_prefix: String,
#[arg(long, value_name = "PREFIX", default_value_t = DEFAULT_VEP_PREFIX.to_string())]
pub vep_prefix: String,
#[arg(long, value_name = "DELIMITER", default_value_t = DEFAULT_FORMAT_DELIM.to_string())]
pub format_delim: String,
#[arg(short, long, value_name = "STRING", default_value_t = DEFAULT_MISSING_STRING.to_string())]
pub missing_string: String,
}
impl Opt {
pub fn include_column(&self, colname: &str) -> bool {
match (&self.not, &self.just) {
(None, None) => true,
(Some(not), None) => !¬.iter().any(|e| e == colname),
(None, Some(just)) => just.iter().any(|e| e == colname),
(Some(not), Some(just)) => {
!¬.iter().any(|e| e == colname) && just.iter().any(|e| e == colname)
}
}
}
pub fn include_info_column(&self, colname: &str) -> bool {
if colname == VEP_FIELD_NAME && self.vep_fields {
return true;
}
let full_colname = format!("{}{}", self.info_prefix, colname);
self.include_column(&full_colname)
}
pub fn setup_input(&self) -> Result<Box<dyn Read>, Box<dyn Error>> {
Ok(match &self.input {
Some(path) if self.in_gz || is_path_gz(path) => {
Box::new(bgzf::Reader::new(File::open(path)?))
}
Some(path) => Box::new(File::open(path)?),
None if self.in_gz => Box::new(bgzf::Reader::new(stdin())),
None => Box::new(stdin()),
})
}
pub fn setup_output(&self) -> Result<Box<dyn Write>, Box<dyn Error>> {
Ok(match self.output.clone() {
Some(path) if self.out_gz || is_path_gz(&path) => {
Box::new(GzEncoder::new(File::create(path)?, Compression::default()))
}
Some(path) => Box::new(File::create(path)?),
None if self.out_gz => Box::new(GzEncoder::new(stdout(), Compression::default())),
None => Box::new(stdout()),
})
}
}