use anyhow::ensure;
use clap::Args;
use log::info;
use serde::Serialize;
use std::path::PathBuf;
use strum_macros::EnumString;
use crate::cli::core::{check_optional_filename, check_required_filename, AFTER_HELP, FULL_VERSION};
use crate::parsing::noodles_helper::get_vcf_sample_name;
#[derive(Clone, Copy, Default, Debug, strum_macros::Display, EnumString, Serialize, clap::ValueEnum)]
pub enum MergeStrategy {
#[default]
#[strum(ascii_case_insensitive, serialize = "exact")]
#[clap(name = "exact")]
Exact,
#[strum(ascii_case_insensitive, serialize = "no_conflict")]
#[clap(name = "no_conflict")]
NoConflict,
#[strum(ascii_case_insensitive, serialize = "majority")]
#[clap(name = "majority")]
MajorityVote,
#[strum(ascii_case_insensitive, serialize = "all")]
#[clap(name = "all")]
AllOptions,
}
#[derive(Args, Clone, Default, Serialize)]
#[clap(author, about,
after_help = &**AFTER_HELP
)]
pub struct MergeSettings {
#[clap(default_value = "")]
#[clap(hide = true)]
aardvark_version: String,
#[clap(required = true)]
#[clap(short = 'r')]
#[clap(long = "reference")]
#[clap(value_name = "FASTA")]
#[clap(help_heading = Some("Input/Output"))]
pub reference_fn: PathBuf,
#[clap(required = true)]
#[clap(short = 'i')]
#[clap(long = "input-vcf")]
#[clap(value_name = "VCF")]
#[clap(help_heading = Some("Input/Output"))]
pub vcf_filenames: Vec<PathBuf>,
#[clap(short = 's')]
#[clap(long = "vcf-sample")]
#[clap(value_name = "SAMPLE")]
#[clap(help_heading = Some("Input/Output"))]
pub vcf_samples: Vec<String>,
#[clap(short = 't')]
#[clap(long = "vcf-tag")]
#[clap(value_name = "SAMPLE")]
#[clap(help_heading = Some("Input/Output"))]
pub vcf_tags: Vec<String>,
#[clap(short = 'b')]
#[clap(long = "regions")]
#[clap(value_name = "BED")]
#[clap(help_heading = Some("Input/Output"))]
pub merge_regions: Option<PathBuf>,
#[clap(short = 'o')]
#[clap(long = "output-vcfs")]
#[clap(value_name = "DIR")]
#[clap(help_heading = Some("Input/Output"))]
pub output_vcf_folder: PathBuf,
#[clap(long = "output-summary")]
#[clap(value_name = "TSV")]
#[clap(help_heading = Some("Input/Output"))]
pub output_summary_filename: Option<PathBuf>,
#[clap(long = "output-debug")]
#[clap(value_name = "DIR")]
#[clap(help_heading = Some("Input/Output"))]
pub debug_folder: Option<PathBuf>,
#[clap(long = "min-variant-gap")]
#[clap(value_name = "BP")]
#[clap(help_heading = Some("Region generation"))]
#[clap(default_value = "50")]
pub min_variant_gap: usize,
#[clap(long = "disable-variant-trimming")]
#[clap(help_heading = Some("Region generation"))]
pub disable_variant_trimming: bool,
#[clap(long = "merge-strategy")]
#[clap(value_name = "STRAT")]
#[clap(help_heading = Some("Merge parameters"))]
pub merge_strategy: Option<MergeStrategy>,
#[clap(long = "enable-no-conflict")]
#[clap(help_heading = Some("Merge parameters"))]
pub enable_no_conflict: bool,
#[clap(long = "enable-voting")]
#[clap(help_heading = Some("Merge parameters"))]
pub enable_voting: bool,
#[clap(long = "conflict-select")]
#[clap(value_name = "INDEX")]
#[clap(help_heading = Some("Merge parameters"))]
pub conflict_selection: Option<usize>,
#[clap(long = "max-branch-factor")]
#[clap(value_name = "INT")]
#[clap(help_heading = Some("Merge parameters"))]
#[clap(default_value = "50")]
pub max_branch_factor: usize,
#[clap(long = "threads")]
#[clap(value_name = "THREADS")]
#[clap(default_value = "1")]
pub threads: usize,
#[clap(short = 'v')]
#[clap(long = "verbose")]
#[clap(action = clap::ArgAction::Count)]
pub verbosity: u8,
#[clap(hide = true)]
#[clap(long = "skip")]
#[clap(default_value = "0")]
pub skip_blocks: usize,
#[clap(hide = true)]
#[clap(long = "take")]
#[clap(default_value = "0")]
pub take_blocks: usize,
}
pub fn check_merge_settings(mut settings: MergeSettings) -> anyhow::Result<MergeSettings> {
settings.aardvark_version = FULL_VERSION.clone();
info!("Aardvark version: {:?}", &settings.aardvark_version);
info!("Sub-command: merge");
info!("Inputs:");
check_required_filename(&settings.reference_fn, "Reference FASTA")?;
info!("\tReference: {:?}", &settings.reference_fn);
check_optional_filename(settings.merge_regions.as_deref(), "Merge regions")?;
if let Some(hcr_fn) = settings.merge_regions.as_deref() {
info!("\tMerge regions: {hcr_fn:?}");
} else {
info!("\tMerge regions: None");
}
for (i, i_vcf) in settings.vcf_filenames.iter().enumerate() {
check_required_filename(i_vcf, format!("Input VCF #{i}").as_str())?;
info!("\tInput VCF #{i}: {i_vcf:?}");
if settings.vcf_samples.len() <= i {
settings.vcf_samples.push(get_vcf_sample_name(i_vcf, 0)?);
}
info!("\t\tSample name: {:?}", settings.vcf_samples[i]);
if settings.vcf_tags.len() <= i {
settings.vcf_tags.push(format!("vcf_{i}"));
}
info!("\t\tOutput tag: {:?}", settings.vcf_tags[i]);
}
info!("Outputs:");
info!("\tVCF folder: {:?}", &settings.output_vcf_folder);
info!("\tSummary: {:?}", &settings.output_summary_filename);
if let Some(debug_folder) = settings.debug_folder.as_ref() {
info!("\tDebug folder: {debug_folder:?}");
}
info!("Region generation parameters:");
ensure!(settings.min_variant_gap > 0, "--min-variant-gap must be >0");
info!("\tMinimum variant gap: {}", settings.min_variant_gap);
info!("\tVariant trimming: {}", if settings.disable_variant_trimming { "DISABLED "} else { "ENABLED" });
info!("Merge parameters:");
if let Some(merge_strat) = settings.merge_strategy {
info!("\tPre-set merge strategy: {merge_strat}");
match merge_strat {
MergeStrategy::Exact => {}, MergeStrategy::NoConflict => {
settings.enable_no_conflict = true;
},
MergeStrategy::MajorityVote => {
settings.enable_voting = true;
},
MergeStrategy::AllOptions => {
settings.enable_no_conflict = true;
settings.enable_voting = true;
},
};
} else {
info!("\tPre-set merge strategy: None")
}
info!("\tNo conflict blocks: {}", if settings.enable_no_conflict { "ENABLED" } else { "DISABLED" });
info!("\tMajority voting blocks: {}", if settings.enable_voting { "ENABLED" } else { "DISABLED" });
ensure!(settings.max_branch_factor > 0, "--max-branch-factor must be >0");
info!("\tMax branch factor: {}", settings.max_branch_factor);
if let Some(v_index) = settings.conflict_selection {
ensure!(v_index < settings.vcf_filenames.len(), "--conflict-selection index is greater than number of provided VCFs");
info!("\tConflict selection: input #{} -> \"{}\"", v_index, settings.vcf_tags[v_index]);
} else {
info!("\tConflict selection: None");
}
if settings.take_blocks == 0 {
settings.take_blocks = usize::MAX;
}
if settings.threads == 0 {
settings.threads = 1;
}
info!("Processing threads: {}", settings.threads);
Ok(settings)
}