use std::str::FromStr;
use noodles_vcf::record::info::field;
use super::gnomad3;
use crate::common;
include!(concat!(env!("OUT_DIR"), "/annonars.gnomad.gnomad4.rs"));
include!(concat!(
env!("OUT_DIR"),
"/annonars.gnomad.gnomad4.serde.rs"
));
pub static COHORTS: &[&str] = &[
"joint", "non_ukb", ];
pub static GRPS: &[&str] = &[
"afr",
"ami", "amr",
"asj",
"eas",
"fin",
"mid",
"nfe",
"remaining",
"sas",
];
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum RecordType {
Genomes,
Exomes,
}
impl Record {
pub fn from_vcf_allele(
record: &noodles_vcf::record::Record,
allele_no: usize,
options: &gnomad3::DetailsOptions,
record_type: RecordType,
) -> Result<Self, anyhow::Error> {
assert!(allele_no == 0, "only allele 0 is supported");
assert!(allele_no == 0, "only allele 0 is supported");
let chrom = record.chromosome().to_string();
let pos: usize = record.position().into();
let pos = pos as i32;
let ref_allele = record.reference_bases().to_string();
let alt_allele = record
.alternate_bases()
.get(allele_no)
.ok_or_else(|| anyhow::anyhow!("no such allele: {}", allele_no))?
.to_string();
let filters = gnomad3::Record::extract_filters(record)?;
let allele_counts = Self::extract_cohorts_allele_counts(record, record_type)?;
let nonpar = common::noodles::get_flag(record, "non_par").unwrap_or_default();
let outside_broad_capture_region =
common::noodles::get_flag(record, "outside_broad_capture_region").unwrap_or_default();
let outside_ukb_capture_region =
common::noodles::get_flag(record, "outside_ukb_capture_region").unwrap_or_default();
let sibling_singleton =
common::noodles::get_flag(record, "sibling_singleton").unwrap_or_default();
let only_het = common::noodles::get_flag(record, "only_het").unwrap_or_default();
let vep = options
.vep
.then(|| Self::extract_vep(record))
.transpose()?
.unwrap_or_default();
let effect_info = options
.effect_info
.then(|| Self::extract_effect_info(record))
.transpose()?;
let variant_info = options
.var_info
.then(|| gnomad3::Record::extract_variant_info(record))
.transpose()?;
let quality_info = options
.quality
.then(|| gnomad3::Record::extract_quality(record))
.transpose()?;
let age_info = options
.age_hists
.then(|| gnomad3::Record::extract_age(record))
.transpose()?;
let depth_info = options
.depth_details
.then(|| gnomad3::Record::extract_depth(record))
.transpose()?;
let vrs_info = options
.var_info
.then(|| Self::extract_vrs_info(record))
.transpose()?;
Ok(Self {
chrom,
pos,
ref_allele,
alt_allele,
filters,
vep,
allele_counts,
effect_info,
nonpar,
only_het,
outside_broad_capture_region,
outside_ukb_capture_region,
sibling_singleton,
variant_info,
quality_info,
age_info,
depth_info,
vrs_info,
})
}
fn extract_vep(
record: &noodles_vcf::Record,
) -> Result<Vec<super::vep_gnomad4::Vep>, anyhow::Error> {
if let Some(Some(field::Value::Array(field::value::Array::String(v)))) =
record.info().get(&field::Key::from_str("vep")?)
{
v.iter()
.flat_map(|v| {
if let Some(s) = v.as_ref() {
if s.matches('|').count() + 1 == super::vep_gnomad4::Vep::num_fields() {
Some(super::vep_gnomad4::Vep::from_str(s))
} else {
None
}
} else {
None
}
})
.collect::<Result<Vec<_>, _>>()
} else {
anyhow::bail!("missing INFO/vep in gnomAD-nuclear record")
}
}
fn extract_vrs_info(record: &noodles_vcf::Record) -> Result<VrsInfo, anyhow::Error> {
Ok(VrsInfo {
allele_ids: common::noodles::get_vec_str(record, "VRS_Allele_IDs").unwrap_or_default(),
ends: common::noodles::get_vec_i32(record, "VRS_Ends").unwrap_or_default(),
starts: common::noodles::get_vec_i32(record, "VRS_Starts").unwrap_or_default(),
states: common::noodles::get_vec_str(record, "VRS_States").unwrap_or_default(),
})
}
fn extract_effect_info(record: &noodles_vcf::Record) -> Result<EffectInfo, anyhow::Error> {
Ok(EffectInfo {
pangolin_largest_ds: common::noodles::get_f32(record, "pangolin_largest_ds").ok(),
phylop: common::noodles::get_f32(record, "phylop").ok(),
polyphen_max: common::noodles::get_f32(record, "polyphen_max").ok(),
revel_max: common::noodles::get_f32(record, "revel_max").ok(),
sift_max: common::noodles::get_f32(record, "sift_max").ok(),
spliceai_ds_max: common::noodles::get_f32(record, "spliceai_ds_max").ok(),
cadd_raw: common::noodles::get_f32(record, "cadd_raw").ok(),
cadd_phred: common::noodles::get_f32(record, "cadd_phred").ok(),
})
}
fn extract_cohorts_allele_counts(
record: &noodles_vcf::Record,
record_type: RecordType,
) -> Result<Vec<CohortAlleleCounts>, anyhow::Error> {
let mut global_counts = CohortAlleleCounts {
cohort: None,
by_sex: Some(gnomad3::AlleleCountsBySex {
overall: Some(Self::extract_allele_counts(record, "", "")?),
xx: Some(Self::extract_allele_counts(record, "", "_XX")?),
xy: Some(Self::extract_allele_counts(record, "", "_XY")?),
}),
raw: Some(Self::extract_allele_counts(record, "", "_raw")?),
grpmax: common::noodles::get_string(record, "grpmax").ok(),
af_grpmax: common::noodles::get_f32(record, "AF_grpmax").ok(),
ac_grpmax: common::noodles::get_i32(record, "AC_grpmax").ok(),
an_grpmax: common::noodles::get_i32(record, "AN_grpmax").ok(),
nhomalt_grpmax: common::noodles::get_i32(record, "nhomalt_grpmax").ok(),
by_ancestry_group: Vec::new(), };
for pop in GRPS {
if record_type == RecordType::Exomes && pop == &"ami" {
continue;
}
global_counts
.by_ancestry_group
.push(Self::extract_ancestry_group_allele_counts(record, "", pop)?);
}
let mut result = Vec::new();
for cohort in COHORTS {
let infix = format!("_{}", cohort);
let mut cohort_counts = CohortAlleleCounts {
cohort: Some(cohort.to_string()),
by_sex: Some(gnomad3::AlleleCountsBySex {
overall: Some(Self::extract_allele_counts(record, &infix, "")?),
xx: Some(Self::extract_allele_counts(record, &infix, "_XX")?),
xy: Some(Self::extract_allele_counts(record, &infix, "_XY")?),
}),
raw: Some(Self::extract_allele_counts(record, &infix, "_raw")?),
grpmax: common::noodles::get_string(record, &format!("grpmax_{}", cohort)).ok(),
af_grpmax: common::noodles::get_f32(record, &format!("AF_grpmax_{}", cohort)).ok(),
ac_grpmax: common::noodles::get_i32(record, &format!("AC_grpmax_{}", cohort)).ok(),
an_grpmax: common::noodles::get_i32(record, &format!("AN_grpmax_{}", cohort)).ok(),
nhomalt_grpmax: common::noodles::get_i32(
record,
&format!("nhomalt_grpmax_{}", cohort),
)
.ok(),
by_ancestry_group: Vec::new(), };
for pop in GRPS {
if record_type == RecordType::Exomes && pop == &"ami" {
continue;
}
cohort_counts
.by_ancestry_group
.push(Self::extract_ancestry_group_allele_counts(
record, &infix, pop,
)?);
}
result.push(cohort_counts);
}
result.insert(1, global_counts);
Ok(result)
}
fn extract_ancestry_group_allele_counts(
record: &noodles_vcf::Record,
infix: &str,
grp: &str,
) -> Result<AncestryGroupAlleleCounts, anyhow::Error> {
Ok(AncestryGroupAlleleCounts {
ancestry_group: grp.to_string(),
counts: Some(gnomad3::AlleleCountsBySex {
overall: Some(Self::extract_allele_counts(
record,
infix,
&format!("_{}", grp),
)?),
xx: Some(Self::extract_allele_counts(
record,
infix,
&format!("_{}_XX", grp),
)?),
xy: Some(Self::extract_allele_counts(
record,
infix,
&format!("_{}_XY", grp),
)?),
}),
faf95: common::noodles::get_f32(record, &format!("faf95_{}", grp)).ok(),
faf99: common::noodles::get_f32(record, &format!("faf99_{}", grp)).ok(),
faf95_xx: common::noodles::get_f32(record, &format!("faf95_{}_XX", grp)).ok(),
faf99_xx: common::noodles::get_f32(record, &format!("faf99_{}_XX", grp)).ok(),
faf95_xy: common::noodles::get_f32(record, &format!("faf95_{}_XY", grp)).ok(),
faf99_xy: common::noodles::get_f32(record, &format!("faf99_{}_XY", grp)).ok(),
})
}
fn extract_allele_counts(
record: &noodles_vcf::Record,
infix: &str,
suffix: &str,
) -> Result<gnomad3::AlleleCounts, anyhow::Error> {
Ok(gnomad3::AlleleCounts {
ac: common::noodles::get_i32(record, &format!("AC{}{}", infix, suffix))
.unwrap_or_default(),
an: common::noodles::get_i32(record, &format!("AN{}{}", infix, suffix))
.unwrap_or_default(),
nhomalt: common::noodles::get_i32(record, &format!("nhomalt{}{}", infix, suffix))
.unwrap_or_default(),
af: common::noodles::get_f32(record, &format!("AF{}{}", infix, suffix))
.unwrap_or_default(),
})
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_record_from_vcf_allele_gnomad_genomes_grch38() -> Result<(), anyhow::Error> {
let path_vcf = "tests/gnomad-nuclear/example-genomes-grch38/v4.0/gnomad-genomes.vcf";
let mut reader_vcf = noodles_vcf::reader::Builder::default().build_from_path(path_vcf)?;
let header = reader_vcf.read_header()?;
let mut records = Vec::new();
for row in reader_vcf.records(&header) {
let vcf_record = row?;
let record = Record::from_vcf_allele(
&vcf_record,
0,
&gnomad3::DetailsOptions::with_all_enabled(),
RecordType::Genomes,
)?;
records.push(record);
}
insta::assert_yaml_snapshot!(records);
Ok(())
}
#[test]
fn test_record_from_vcf_allele_gnomad_exomess_grch38() -> Result<(), anyhow::Error> {
let path_vcf = "tests/gnomad-nuclear/example-exomes-grch38/v4.0/gnomad-exomes.vcf";
let mut reader_vcf = noodles_vcf::reader::Builder::default().build_from_path(path_vcf)?;
let header = reader_vcf.read_header()?;
let mut records = Vec::new();
for row in reader_vcf.records(&header) {
let vcf_record = row?;
let record = Record::from_vcf_allele(
&vcf_record,
0,
&gnomad3::DetailsOptions::with_all_enabled(),
RecordType::Exomes,
)?;
records.push(record);
}
insta::assert_yaml_snapshot!(records);
Ok(())
}
}