tidyvcf 0.7.3

command-line tool to convert VCF files to tab/comma separated tables
Documentation
use flate2::write::GzEncoder;
use flate2::Compression;
use std::io::stdout;
use std::io::Write;
use std::path::PathBuf;
use std::{
    error::Error,
    fs::File,
    io::{stdin, Read},
};

use clap::Parser;
use noodles::bgzf;

use crate::consts::VEP_FIELD_NAME;
use crate::{
    consts::{
        DEFAULT_FORMAT_DELIM, DEFAULT_INFO_PREFIX, DEFAULT_MISSING_STRING, DEFAULT_VEP_PREFIX,
    },
    utils::is_path_gz,
};

#[allow(clippy::struct_excessive_bools)]
#[derive(Debug, Clone, Parser)]
#[command(author, version, about, long_about = None)]
pub struct Opt {
    /// input file, stdin if not present
    #[arg(short, long, value_name = "FILE")]
    pub input: Option<PathBuf>,

    /// output file, stdout if not present
    #[arg(short, long, value_name = "FILE")]
    pub output: Option<PathBuf>,

    /// columns not to include in the output
    #[arg(short, long, value_parser, value_name = "COLUMNS", num_args = 1..)]
    pub not: Option<Vec<String>>,

    /// just include these columns in the output
    #[arg(short, long, value_parser, value_name = "COLUMNS", num_args = 1..)]
    pub just: Option<Vec<String>>,

    /// stack samples in rows
    #[arg(short, long)]
    pub stack: bool,

    /// split VEP's CSQ string into fields
    #[arg(short, long)]
    pub vep_fields: bool,

    /// comma separated output (default is tab)
    #[arg(short, long)]
    pub csv: bool,

    /// input is BGZF compressed (unnecessary if `-i` has suffix `.gz`)
    #[arg(short = 'g', long)]
    pub in_gz: bool,

    /// gzip compress output (unnecessary if `-o` has suffix `.gz`)
    #[arg(short = 'z', long)]
    pub out_gz: bool,

    /// be lenient with regards to spec non-compliance
    #[arg(short, long)]
    pub lenient: bool,

    /// prefix string for INFO fields to avoid name collisions
    #[arg(long, value_name = "PREFIX", default_value_t = DEFAULT_INFO_PREFIX.to_string())]
    pub info_prefix: String,

    /// prefix string for Ensembl VEP fields to avoid name collisions
    #[arg(long, value_name = "PREFIX", default_value_t = DEFAULT_VEP_PREFIX.to_string())]
    pub vep_prefix: String,

    /// delimiting character to join sample names to fmt fields
    #[arg(long, value_name = "DELIMITER", default_value_t = DEFAULT_FORMAT_DELIM.to_string())]
    pub format_delim: String,

    /// string to output when a field is missing
    #[arg(short, long, value_name = "STRING", default_value_t = DEFAULT_MISSING_STRING.to_string())]
    pub missing_string: String,
}

impl Opt {
    pub fn include_column(&self, colname: &str) -> bool {
        match (&self.not, &self.just) {
            (None, None) => true,
            // workaround Vec<String>.contains(&str) doesn't compile
            (Some(not), None) => !&not.iter().any(|e| e == colname),
            (None, Some(just)) => just.iter().any(|e| e == colname),
            (Some(not), Some(just)) => {
                !&not.iter().any(|e| e == colname) && just.iter().any(|e| e == colname)
            }
        }
    }
    pub fn include_info_column(&self, colname: &str) -> bool {
        if colname == VEP_FIELD_NAME && self.vep_fields {
            return true;
        }
        let full_colname = format!("{}{}", self.info_prefix, colname);
        self.include_column(&full_colname)
    }
    pub fn setup_input(&self) -> Result<Box<dyn Read>, Box<dyn Error>> {
        Ok(match &self.input {
            Some(path) if self.in_gz || is_path_gz(path) => {
                Box::new(bgzf::Reader::new(File::open(path)?))
            }
            Some(path) => Box::new(File::open(path)?),
            None if self.in_gz => Box::new(bgzf::Reader::new(stdin())),
            None => Box::new(stdin()),
        })
    }
    pub fn setup_output(&self) -> Result<Box<dyn Write>, Box<dyn Error>> {
        Ok(match self.output.clone() {
            Some(path) if self.out_gz || is_path_gz(&path) => {
                Box::new(GzEncoder::new(File::create(path)?, Compression::default()))
            }
            Some(path) => Box::new(File::create(path)?),
            None if self.out_gz => Box::new(GzEncoder::new(stdout(), Compression::default())),
            None => Box::new(stdout()),
        })
    }
}