use indexmap::IndexSet;
use noodles::core::position::Position;
use noodles::vcf::variant::record::samples::series::value::genotype::Phasing;
use noodles::vcf::variant::record::samples::series::value::Genotype;
use noodles::vcf::variant::{self, RecordBuf};
use std::error::Error;
use std::path::Path;
use crate::cli::Opt;
use crate::consts::VEP_DESC_PREFIX;
use crate::consts::VEP_FIELD_NAME;
pub fn split_header_csq_fields(csq: &str) -> Vec<String> {
let (_, csq_fields) = csq.split_at(VEP_DESC_PREFIX.len());
csq_fields.split('|').map(ToString::to_string).collect()
}
pub fn get_fmt_field(
key: &String,
sample: &variant::record_buf::samples::Sample,
missing_string: &String,
) -> Result<String, Box<dyn Error>> {
Ok(if let Some(Some(field)) = sample.get(key) {
fmt_field_string(field)?
} else {
missing_string.to_owned()
})
}
pub fn is_path_gz(path: &Path) -> bool {
path.extension() == Some(std::ffi::OsStr::new("gz"))
}
pub fn contig_string(contig: &str) -> String {
contig.to_owned()
}
pub fn push_fmts_cartesian(
fields: &mut Vec<String>,
record: &RecordBuf,
fmt_keys: &Vec<&String>,
opt: &Opt,
) -> Result<(), Box<dyn Error>> {
for sample_fmt in record.samples().values() {
for key in fmt_keys {
fields.push(get_fmt_field(key, &sample_fmt, &opt.missing_string)?);
}
}
Ok(())
}
pub fn push_fmts_stacked(
record: &RecordBuf,
fmt_keys: &Vec<&String>,
opt: &Opt,
samples: &IndexSet<String>,
) -> Result<Vec<Vec<String>>, Box<dyn Error>> {
let mut rows = Vec::new();
for _ in samples {
rows.push(Vec::new());
}
if record
.samples()
.values()
.zip(samples)
.peekable()
.peek()
.is_some()
{
for ((sample_fmt, sample), row) in
record.samples().values().zip(samples).zip(rows.iter_mut())
{
for key in fmt_keys {
row.push(get_fmt_field(key, &sample_fmt, &opt.missing_string)?);
}
row.push(sample.clone());
}
} else {
return Ok(rows);
}
Ok(rows)
}
fn info_field_string(field: &variant::record_buf::info::field::Value) -> String {
match field {
variant::record_buf::info::field::Value::Integer(i) => i.to_string(),
variant::record_buf::info::field::Value::Float(f) => f.to_string(),
variant::record_buf::info::field::Value::Flag => "true".to_owned(),
variant::record_buf::info::field::Value::Character(c) => c.to_string(),
variant::record_buf::info::field::Value::String(s) => s.clone(),
variant::record_buf::info::field::Value::Array(a) => info_array_string(a),
}
}
fn genotype_string(
g: &variant::record_buf::samples::sample::value::Genotype,
) -> Result<String, Box<dyn Error>> {
let alleles: Vec<(Option<usize>, Phasing)> = g.iter().collect::<Result<Vec<_>, _>>()?;
let phasing: Vec<&Phasing> = alleles.iter().map(|(_, p)| p).collect();
if !phasing.windows(2).all(|w| w[0] == w[1]) {
return Err(Box::from(
"Thought unreachable, inconsistent phasing between alleles in genotype!",
));
};
if alleles.len() == 1 {
return Ok(if let Some(n) = alleles[0].0 {
format!("{}", n)
} else {
".".to_string()
});
}
let phased = *phasing[1] == Phasing::Phased;
let indices: String = alleles
.into_iter()
.map(|o| o.0.unwrap_or_default())
.map(|u| u.to_string())
.collect::<Vec<String>>()
.join(if phased { "|" } else { "/" });
Ok(indices)
}
fn fmt_field_string(
field: &variant::record_buf::samples::sample::Value,
) -> Result<String, Box<dyn Error>> {
Ok(match field {
variant::record_buf::samples::sample::Value::Integer(i) => i.to_string(),
variant::record_buf::samples::sample::Value::Float(f) => f.to_string(),
variant::record_buf::samples::sample::Value::Character(c) => c.to_string(),
variant::record_buf::samples::sample::Value::Genotype(g) => genotype_string(g)?,
variant::record_buf::samples::sample::Value::String(s) => s.clone(),
variant::record_buf::samples::sample::Value::Array(a) => fmt_array_string(a),
})
}
fn info_array_string(array: &variant::record_buf::info::field::value::Array) -> String {
match array {
variant::record_buf::info::field::value::Array::Integer(v) => vec_option_string(v),
variant::record_buf::info::field::value::Array::Float(v) => vec_option_string(v),
variant::record_buf::info::field::value::Array::Character(v) => vec_option_string(v),
variant::record_buf::info::field::value::Array::String(v) => vec_option_string(v),
}
}
fn fmt_array_string(array: &variant::record_buf::samples::sample::value::Array) -> String {
match array {
variant::record_buf::samples::sample::value::Array::Integer(v) => vec_option_string(v),
variant::record_buf::samples::sample::value::Array::Float(v) => vec_option_string(v),
variant::record_buf::samples::sample::value::Array::Character(v) => vec_option_string(v),
variant::record_buf::samples::sample::value::Array::String(v) => vec_option_string(v),
}
}
fn vec_option_string<T: ToString>(vec: &[Option<T>]) -> String {
vec.iter()
.map(|e| match e {
Some(elem) => elem.to_string(),
None => ".".to_owned(),
})
.collect::<Vec<String>>()
.join(",")
}
pub fn push_info_fields(
info_keys: &Vec<&String>,
info: &variant::record_buf::Info,
fields: &mut Vec<String>,
split_csq: usize,
opt: &Opt,
) {
for info_field in info_keys {
match info_field {
key if split_csq != 0 && *key == VEP_FIELD_NAME => {
let mut subfields: Vec<String> = if let Some(Some(field)) = info.get(*info_field) {
match field {
crate::utils::variant::record_buf::info::field::Value::String(s) => {
s.splitn(split_csq, '|').map(ToString::to_string).collect()
}
crate::utils::variant::record_buf::info::field::Value::Array(
crate::utils::variant::record_buf::info::field::value::Array::String(
sa,
),
) => {
let mut fields_string = String::new();
for string in sa.iter().flatten() {
fields_string.push_str(string);
fields_string.push('|');
}
fields_string
.splitn(split_csq, '|')
.map(ToString::to_string)
.collect()
}
_ => panic!("CSQ field not string(s)!"),
}
} else {
Vec::new()
};
if subfields.len() < split_csq {
for _ in 0..(split_csq - subfields.len()) {
subfields.push(opt.missing_string.to_owned());
}
}
fields.append(&mut subfields);
}
_ => fields.push(match info.get(*info_field) {
Some(None) | None => opt.missing_string.to_owned(),
Some(Some(field)) => info_field_string(field),
}),
}
}
}
fn variant_start_string(pos: Option<Position>) -> String {
match pos {
Some(p) => p.get().to_string(),
None => format!("{}", 0),
}
}
pub fn main_fields_from_record(record: &RecordBuf, opt: &Opt) -> Vec<String> {
let mut main_fields = Vec::new();
if opt.include_column("contig") {
main_fields.push(contig_string(record.reference_sequence_name()));
}
if opt.include_column("pos") {
main_fields.push(variant_start_string(record.variant_start()));
}
if opt.include_column("id") {
main_fields.push(
record
.ids()
.as_ref()
.iter()
.map(ToString::to_string)
.collect::<Vec<String>>()
.join(","),
);
}
if opt.include_column("ref") {
main_fields.push(record.reference_bases().to_owned());
}
if opt.include_column("alt") {
main_fields.push(record.alternate_bases().as_ref().join(","));
}
if opt.include_column("qual") {
main_fields.push(if let Some(qual) = record.quality_score() {
format!("{qual}")
} else {
opt.missing_string.to_owned()
})
}
if opt.include_column("filter") {
let filtersfield = record.filters();
let filters = if filtersfield.is_pass() {
"PASS".to_string()
} else {
filtersfield
.as_ref()
.iter()
.map(ToString::to_string)
.collect::<Vec<String>>()
.join(",")
};
main_fields.push(filters);
}
main_fields
}
#[cfg(test)]
mod tests {}